On Thu, Feb 5, 2015 at 11:06 PM, Syed, Rahila <rahila.s...@nttdata.com> wrote:
>>/*
>>+    * We recheck the actual size even if pglz_compress() report success,
>>+    * because it might be satisfied with having saved as little as one byte
>>+    * in the compressed data.
>>+    */
>>+   *len = (uint16) compressed_len;
>>+   if (*len >= orig_len - 1)
>>+       return false;
>>+   return true;
>>+}
>
> As per latest code ,when compression is 'on' we introduce additional 2 bytes 
> in the header of each block image for storing raw_length of the compressed 
> block.
> In order to achieve compression while accounting for these two additional 
> bytes, we must ensure that compressed length is less than original length - 2.
> So , IIUC the above condition should rather be
>
> If (*len >= orig_len -2 )
>                 return false;
> return true;
> The attached patch contains this. It also has a cosmetic change-  renaming 
> compressBuf to uncompressBuf as it is used to store uncompressed page.

Agreed on both things.

Just looking at your latest patch after some time to let it cool down,
I noticed a couple of things.

 #define MaxSizeOfXLogRecordBlockHeader \
     (SizeOfXLogRecordBlockHeader + \
-     SizeOfXLogRecordBlockImageHeader + \
+     SizeOfXLogRecordBlockImageHeader, \
+     SizeOfXLogRecordBlockImageCompressionInfo + \
There is a comma here instead of a sum sign. We should really sum up
all those sizes to evaluate the maximum size of a block header.

+     * Permanently allocate readBuf uncompressBuf.  We do it this way,
+     * rather than just making a static array, for two reasons:
This comment is just but weird, "readBuf AND uncompressBuf" is more appropriate.

+     * We recheck the actual size even if pglz_compress() report success,
+     * because it might be satisfied with having saved as little as one byte
+     * in the compressed data. We add two bytes to store raw_length  with the
+     * compressed image. So for compression to be effective
compressed_len should
+     * be atleast < orig_len - 2
This comment block should be reworked, and misses a dot at its end. I
rewrote it like that, hopefully that's clearer:
+       /*
+        * We recheck the actual size even if pglz_compress() reports
success and see
+        * if at least 2 bytes of length have been saved, as this
corresponds to the
+        * additional amount of data stored in WAL record for a compressed block
+        * via raw_length.
+        */

In any case, those things have been introduced by what I did in
previous versions... And attached is a new patch.
-- 
Michael
From 2b0c54bc7c4bfb2494cf8b0394d56635b01d7c5a Mon Sep 17 00:00:00 2001
From: Michael Paquier <mich...@otacoo.com>
Date: Tue, 25 Nov 2014 14:24:26 +0900
Subject: [PATCH] Support compression for full-page writes in WAL

Compression is controlled with a new parameter called wal_compression.
This parameter can be changed at session level to control WAL compression.
---
 contrib/pg_xlogdump/pg_xlogdump.c             |  17 ++-
 doc/src/sgml/config.sgml                      |  29 +++++
 src/backend/access/transam/xlog.c             |   1 +
 src/backend/access/transam/xloginsert.c       | 161 ++++++++++++++++++++++----
 src/backend/access/transam/xlogreader.c       |  71 +++++++++---
 src/backend/utils/misc/guc.c                  |   9 ++
 src/backend/utils/misc/postgresql.conf.sample |   1 +
 src/include/access/xlog.h                     |   1 +
 src/include/access/xlogreader.h               |   7 +-
 src/include/access/xlogrecord.h               |  49 ++++++--
 src/include/pg_config.h.in                    |   4 +-
 11 files changed, 297 insertions(+), 53 deletions(-)

diff --git a/contrib/pg_xlogdump/pg_xlogdump.c b/contrib/pg_xlogdump/pg_xlogdump.c
index c1bfbc2..3ebaac6 100644
--- a/contrib/pg_xlogdump/pg_xlogdump.c
+++ b/contrib/pg_xlogdump/pg_xlogdump.c
@@ -363,14 +363,14 @@ XLogDumpCountRecord(XLogDumpConfig *config, XLogDumpStats *stats,
 	 * takes up BLCKSZ bytes, minus the "hole" length.
 	 *
 	 * XXX: We peek into xlogreader's private decoded backup blocks for the
-	 * hole_length. It doesn't seem worth it to add an accessor macro for
+	 * length of block. It doesn't seem worth it to add an accessor macro for
 	 * this.
 	 */
 	fpi_len = 0;
 	for (block_id = 0; block_id <= record->max_block_id; block_id++)
 	{
 		if (XLogRecHasBlockImage(record, block_id))
-			fpi_len += BLCKSZ - record->blocks[block_id].hole_length;
+			fpi_len += record->blocks[block_id].bkp_len;
 	}
 
 	/* Update per-rmgr statistics */
@@ -465,9 +465,16 @@ XLogDumpDisplayRecord(XLogDumpConfig *config, XLogReaderState *record)
 				   blk);
 			if (XLogRecHasBlockImage(record, block_id))
 			{
-				printf(" (FPW); hole: offset: %u, length: %u\n",
-					   record->blocks[block_id].hole_offset,
-					   record->blocks[block_id].hole_length);
+				if (record->blocks[block_id].is_compressed)
+					printf(" (FPW compressed); hole offset: %u, "
+						   "compressed length: %u, original length: %u\n",
+						   record->blocks[block_id].hole_offset,
+						   record->blocks[block_id].bkp_len,
+						   record->blocks[block_id].bkp_uncompress_len);
+				else
+					printf(" (FPW); hole offset: %u, length: %u\n",
+						   record->blocks[block_id].hole_offset,
+						   record->blocks[block_id].bkp_len);
 			}
 			putchar('\n');
 		}
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6bcb106..acbbd20 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -2282,6 +2282,35 @@ include_dir 'conf.d'
       </listitem>
      </varlistentry>
 
+     <varlistentry id="guc-wal-compression" xreflabel="wal_compression">
+      <term><varname>wal_compression</varname> (<type>boolean</type>)
+      <indexterm>
+       <primary><varname>wal_compression</> configuration parameter</primary>
+      </indexterm>
+      </term>
+      <listitem>
+       <para>
+        When this parameter is <literal>on</>, the <productname>PostgreSQL</>
+        server compresses the content of full-page writes when necessary and
+        inserts in WAL records with smaller sizes, reducing the amount of
+        WAL stored on disk.
+       </para>
+
+       <para>
+        Compression has the advantage of reducing the amount of disk I/O when
+        doing WAL-logging, at the cost of some extra CPU to perform the
+        compression of a block image.  At WAL replay, compressed block images
+        need extra CPU cycles to perform the decompression of each block
+        image, but it can reduce as well replay time in I/O bounded
+        environments.
+       </para>
+
+       <para>
+        The default value is <literal>off</>.
+       </para>
+      </listitem>
+     </varlistentry>
+
      <varlistentry id="guc-wal-buffers" xreflabel="wal_buffers">
       <term><varname>wal_buffers</varname> (<type>integer</type>)
       <indexterm>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 629a457..992cfd0 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -88,6 +88,7 @@ char	   *XLogArchiveCommand = NULL;
 bool		EnableHotStandby = false;
 bool		fullPageWrites = true;
 bool		wal_log_hints = false;
+bool		wal_compression = false;
 bool		log_checkpoints = false;
 int			sync_method = DEFAULT_SYNC_METHOD;
 int			wal_level = WAL_LEVEL_MINIMAL;
diff --git a/src/backend/access/transam/xloginsert.c b/src/backend/access/transam/xloginsert.c
index a1e2eb8..64881ab 100644
--- a/src/backend/access/transam/xloginsert.c
+++ b/src/backend/access/transam/xloginsert.c
@@ -24,12 +24,16 @@
 #include "access/xlog_internal.h"
 #include "access/xloginsert.h"
 #include "catalog/pg_control.h"
+#include "common/pg_lzcompress.h"
 #include "miscadmin.h"
 #include "storage/bufmgr.h"
 #include "storage/proc.h"
 #include "utils/memutils.h"
 #include "pg_trace.h"
 
+/* maximum size for compression buffer of block image */
+#define PGLZ_MAX_BLCKSZ	PGLZ_MAX_OUTPUT(BLCKSZ)
+
 /*
  * For each block reference registered with XLogRegisterBuffer, we fill in
  * a registered_buffer struct.
@@ -50,6 +54,8 @@ typedef struct
 
 	XLogRecData bkp_rdatas[2];	/* temporary rdatas used to hold references to
 								 * backup block data in XLogRecordAssemble() */
+	char		compressed_page[PGLZ_MAX_BLCKSZ]; /* recipient for compressed
+												   * page */
 }	registered_buffer;
 
 static registered_buffer *registered_buffers;
@@ -81,6 +87,9 @@ static char *hdr_scratch = NULL;
 	 MaxSizeOfXLogRecordBlockHeader * (XLR_MAX_BLOCK_ID + 1) + \
 	 SizeOfXLogRecordDataHeaderLong)
 
+/* Scratch buffer holding block image data to be compressed  */
+static char *compression_scratch = NULL;
+
 /*
  * An array of XLogRecData structs, to hold registered data.
  */
@@ -97,6 +106,9 @@ static XLogRecData *XLogRecordAssemble(RmgrId rmid, uint8 info,
 				   XLogRecPtr RedoRecPtr, bool doPageWrites,
 				   XLogRecPtr *fpw_lsn);
 
+static bool XLogCompressBackupBlock(char *page, uint32 orig_len,
+									char *dest, uint16 *len);
+
 /*
  * Begin constructing a WAL record. This must be called before the
  * XLogRegister* functions and XLogInsert().
@@ -482,7 +494,9 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 		bool		needs_data;
 		XLogRecordBlockHeader bkpb;
 		XLogRecordBlockImageHeader bimg;
+		XLogRecordBlockImageCompressionInfo cbimg;
 		bool		samerel;
+		bool		is_compressed = false;
 
 		if (!regbuf->in_use)
 			continue;
@@ -529,9 +543,13 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 		if (needs_backup)
 		{
 			Page		page = regbuf->page;
+			uint16		hole_length;
+			uint16		hole_offset;
+			uint16		compress_len = 0;
 
 			/*
-			 * The page needs to be backed up, so set up *bimg
+			 * The page needs to be backed up, so calculate its hole length
+			 * and offset.
 			 */
 			if (regbuf->flags & REGBUF_STANDARD)
 			{
@@ -543,49 +561,107 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 					upper > lower &&
 					upper <= BLCKSZ)
 				{
-					bimg.hole_offset = lower;
-					bimg.hole_length = upper - lower;
+					hole_offset = lower;
+					hole_length = upper - lower;
 				}
 				else
 				{
 					/* No "hole" to compress out */
-					bimg.hole_offset = 0;
-					bimg.hole_length = 0;
+					hole_offset = 0;
+					hole_length = 0;
 				}
 			}
 			else
 			{
 				/* Not a standard page header, don't try to eliminate "hole" */
-				bimg.hole_offset = 0;
-				bimg.hole_length = 0;
+				hole_offset = 0;
+				hole_length = 0;
 			}
 
-			/* Fill in the remaining fields in the XLogRecordBlockData struct */
-			bkpb.fork_flags |= BKPBLOCK_HAS_IMAGE;
+			/*
+			 * First try to compress block without its hole to improve the
+			 * compression of the whole. If the block is considered as
+			 * not compressible, complete the block header information
+			 * accordingly.
+			 */
+			if (wal_compression)
+			{
+				int page_len = BLCKSZ - hole_length;
+				char *scratch_buf;
 
-			total_len += BLCKSZ - bimg.hole_length;
+				if (hole_length != 0)
+				{
+					scratch_buf = compression_scratch;
+					memcpy(scratch_buf, page, hole_offset);
+					memcpy(scratch_buf + hole_offset,
+						   page + (hole_offset + hole_length),
+						   BLCKSZ - (hole_length + hole_offset));
+				}
+				else
+					scratch_buf = page;
+
+				/* Perform compression of block */
+				if (XLogCompressBackupBlock(scratch_buf,
+											page_len,
+											regbuf->compressed_page,
+											&compress_len))
+				{
+					/* compression is done, add record */
+					is_compressed = true;
+				}
+			}
 
 			/*
 			 * Construct XLogRecData entries for the page content.
 			 */
-			rdt_datas_last->next = &regbuf->bkp_rdatas[0];
-			rdt_datas_last = rdt_datas_last->next;
-			if (bimg.hole_length == 0)
+			bkpb.fork_flags |= BKPBLOCK_HAS_IMAGE;
+
+			/* hole offset length should be 15-bit long */
+			Assert((hole_offset & 0x8000) == 0);
+
+			if (is_compressed)
 			{
-				rdt_datas_last->data = page;
-				rdt_datas_last->len = BLCKSZ;
+				/* compressed block information */
+				bimg.length = compress_len;
+				bimg.hole_offset = hole_offset;
+				bimg.is_compressed = 1;
+				cbimg.raw_length = BLCKSZ - hole_length;
+
+				/* record entry for compressed block */
+				rdt_datas_last->next = &regbuf->bkp_rdatas[0];
+				rdt_datas_last = rdt_datas_last->next;
+				rdt_datas_last->data = regbuf->compressed_page;
+				rdt_datas_last->len = compress_len;
+				total_len += bimg.length;
 			}
 			else
 			{
-				/* must skip the hole */
-				rdt_datas_last->data = page;
-				rdt_datas_last->len = bimg.hole_offset;
-
-				rdt_datas_last->next = &regbuf->bkp_rdatas[1];
+				/* uncompressed block information */
+				bimg.length = BLCKSZ - hole_length;
+				bimg.hole_offset = hole_offset;
+				bimg.is_compressed = 0;
+				total_len += bimg.length;
+
+				/* record entries for uncompressed block */
+				rdt_datas_last->next = &regbuf->bkp_rdatas[0];
 				rdt_datas_last = rdt_datas_last->next;
+				if (hole_length == 0)
+				{
+					rdt_datas_last->data = page;
+					rdt_datas_last->len = BLCKSZ;
+				}
+				else
+				{
+					/* must skip the hole */
+					rdt_datas_last->data = page;
+					rdt_datas_last->len = hole_offset;
+
+					rdt_datas_last->next = &regbuf->bkp_rdatas[1];
+					rdt_datas_last = rdt_datas_last->next;
 
-				rdt_datas_last->data = page + (bimg.hole_offset + bimg.hole_length);
-				rdt_datas_last->len = BLCKSZ - (bimg.hole_offset + bimg.hole_length);
+					rdt_datas_last->data = page + (hole_offset + hole_length);
+					rdt_datas_last->len = BLCKSZ - (hole_offset + hole_length);
+				}
 			}
 		}
 
@@ -619,6 +695,12 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 		{
 			memcpy(scratch, &bimg, SizeOfXLogRecordBlockImageHeader);
 			scratch += SizeOfXLogRecordBlockImageHeader;
+			if (is_compressed)
+			{
+				memcpy(scratch, &cbimg,
+					   SizeOfXLogRecordBlockImageCompressionInfo);
+				scratch += SizeOfXLogRecordBlockImageCompressionInfo;
+			}
 		}
 		if (!samerel)
 		{
@@ -681,6 +763,36 @@ XLogRecordAssemble(RmgrId rmid, uint8 info,
 }
 
 /*
+ * Create a compressed version of a backup block. If successful, return
+ * true and set 'len' to its length. If block cannot be compressed or if
+ * compression failed return false.
+ */
+static bool
+XLogCompressBackupBlock(char *page, uint32 orig_len, char *dest, uint16 *len)
+{
+	int32 compressed_len;
+
+	/* run compression */
+	compressed_len = pglz_compress(page, orig_len, dest,
+								   PGLZ_strategy_default);
+
+	/* leave if data cannot be compressed */
+	if (compressed_len == 0)
+		return false;
+
+	/*
+	 * We recheck the actual size even if pglz_compress() reports success and see
+	 * if at least 2 bytes of length have been saved, as this corresponds to the
+	 * additional amount of data stored in WAL record for a compressed block
+	 * via raw_length.
+	 */
+	*len = (uint16) compressed_len;
+	if (*len >= orig_len - 2)
+		return false;
+	return true;
+}
+
+/*
  * Determine whether the buffer referenced has to be backed up.
  *
  * Since we don't yet have the insert lock, fullPageWrites and forcePageWrites
@@ -893,4 +1005,9 @@ InitXLogInsert(void)
 	if (hdr_scratch == NULL)
 		hdr_scratch = MemoryContextAllocZero(xloginsert_cxt,
 											 HEADER_SCRATCH_SIZE);
+
+	/* allocate scratch buffer used for compression of block images */
+	if (compression_scratch == NULL)
+		compression_scratch = MemoryContextAllocZero(xloginsert_cxt,
+													 BLCKSZ);
 }
diff --git a/src/backend/access/transam/xlogreader.c b/src/backend/access/transam/xlogreader.c
index 60470b5..a7f5a4f 100644
--- a/src/backend/access/transam/xlogreader.c
+++ b/src/backend/access/transam/xlogreader.c
@@ -20,6 +20,7 @@
 #include "access/xlog_internal.h"
 #include "access/xlogreader.h"
 #include "catalog/pg_control.h"
+#include "common/pg_lzcompress.h"
 
 static bool allocate_recordbuf(XLogReaderState *state, uint32 reclength);
 
@@ -74,13 +75,15 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
 	state->max_block_id = -1;
 
 	/*
-	 * Permanently allocate readBuf.  We do it this way, rather than just
-	 * making a static array, for two reasons: (1) no need to waste the
-	 * storage in most instantiations of the backend; (2) a static char array
-	 * isn't guaranteed to have any particular alignment, whereas palloc()
-	 * will provide MAXALIGN'd storage.
+	 * Permanently allocate readBuf and uncompressBuf.  We do it this way,
+	 * rather than just making a static array, for two reasons:
+	 * (1) no need to waste the  storage in most instantiations of the
+	 * backend; (2) a static char array isn't guaranteed to have any
+	 * particular alignment, whereas palloc() will provide MAXALIGN'd
+	 * storage.
 	 */
 	state->readBuf = (char *) palloc(XLOG_BLCKSZ);
+	state->uncompressBuf = (char *) palloc(BLCKSZ);
 
 	state->read_page = pagereadfunc;
 	/* system_identifier initialized to zeroes above */
@@ -98,6 +101,7 @@ XLogReaderAllocate(XLogPageReadCB pagereadfunc, void *private_data)
 	{
 		pfree(state->errormsg_buf);
 		pfree(state->readBuf);
+		pfree(state->uncompressBuf);
 		pfree(state);
 		return NULL;
 	}
@@ -125,6 +129,7 @@ XLogReaderFree(XLogReaderState *state)
 	if (state->readRecordBuf)
 		pfree(state->readRecordBuf);
 	pfree(state->readBuf);
+	pfree(state->uncompressBuf);
 	pfree(state);
 }
 
@@ -922,6 +927,7 @@ ResetDecoder(XLogReaderState *state)
 		state->blocks[block_id].in_use = false;
 		state->blocks[block_id].has_image = false;
 		state->blocks[block_id].has_data = false;
+		state->blocks[block_id].is_compressed = false;
 	}
 	state->max_block_id = -1;
 }
@@ -1031,9 +1037,16 @@ DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
 
 			if (blk->has_image)
 			{
-				COPY_HEADER_FIELD(&blk->hole_offset, sizeof(uint16));
-				COPY_HEADER_FIELD(&blk->hole_length, sizeof(uint16));
-				datatotal += BLCKSZ - blk->hole_length;
+				XLogRecordBlockImageHeader bkp_info;
+				COPY_HEADER_FIELD(&bkp_info, sizeof(XLogRecordBlockImageHeader));
+				blk->is_compressed = bkp_info.is_compressed;
+				blk->bkp_len = bkp_info.length;
+				blk->hole_offset = bkp_info.hole_offset;
+				if (blk->is_compressed)
+				{
+					COPY_HEADER_FIELD(&blk->bkp_uncompress_len, sizeof(uint16));
+				}
+				datatotal += blk->bkp_len;
 			}
 			if (!(fork_flags & BKPBLOCK_SAME_REL))
 			{
@@ -1088,7 +1101,7 @@ DecodeXLogRecord(XLogReaderState *state, XLogRecord *record, char **errormsg)
 		if (blk->has_image)
 		{
 			blk->bkp_image = ptr;
-			ptr += BLCKSZ - blk->hole_length;
+			ptr += blk->bkp_len;
 		}
 		if (blk->has_data)
 		{
@@ -1194,6 +1207,8 @@ bool
 RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 {
 	DecodedBkpBlock *bkpb;
+	char   *block_image;
+	int		hole_length;
 
 	if (!record->blocks[block_id].in_use)
 		return false;
@@ -1201,19 +1216,43 @@ RestoreBlockImage(XLogReaderState *record, uint8 block_id, char *page)
 		return false;
 
 	bkpb = &record->blocks[block_id];
+	block_image = bkpb->bkp_image;
+
+	/*
+	 * Fetch page data, with different processing depending on if the
+	 * page is compressed or not.
+	 */
+	if (bkpb->is_compressed)
+	{
+		if (pglz_decompress(block_image, record->uncompressBuf,
+							bkpb->bkp_len, bkpb->bkp_uncompress_len) == 0)
+		{
+			report_invalid_record(record, "invalid compressed image at %X/%X, block %d",
+								  (uint32) (record->ReadRecPtr >> 32),
+								  (uint32) record->ReadRecPtr,
+								  block_id);
+			return false;
+		}
+
+		block_image = record->uncompressBuf;
+		hole_length = BLCKSZ - bkpb->bkp_uncompress_len;
+	}
+	else
+		hole_length = BLCKSZ - bkpb->bkp_len;
 
-	if (bkpb->hole_length == 0)
+	/* generate page, taking into account hole if necessary */
+	if (hole_length == 0)
 	{
-		memcpy(page, bkpb->bkp_image, BLCKSZ);
+		memcpy(page, block_image, BLCKSZ);
 	}
 	else
 	{
-		memcpy(page, bkpb->bkp_image, bkpb->hole_offset);
+		memcpy(page, block_image, bkpb->hole_offset);
 		/* must zero-fill the hole */
-		MemSet(page + bkpb->hole_offset, 0, bkpb->hole_length);
-		memcpy(page + (bkpb->hole_offset + bkpb->hole_length),
-			   bkpb->bkp_image + bkpb->hole_offset,
-			   BLCKSZ - (bkpb->hole_offset + bkpb->hole_length));
+		MemSet(page + bkpb->hole_offset, 0, hole_length);
+		memcpy(page + (bkpb->hole_offset + hole_length),
+			   block_image + bkpb->hole_offset,
+			   BLCKSZ - (bkpb->hole_offset + hole_length));
 	}
 
 	return true;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 9572777..302a9c5 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -919,6 +919,15 @@ static struct config_bool ConfigureNamesBool[] =
 		false,
 		NULL, NULL, NULL
 	},
+	{
+		{"wal_compression", PGC_USERSET, WAL_SETTINGS,
+			 gettext_noop("Compresses full-page writes written in WAL file."),
+			 NULL
+		},
+		&wal_compression,
+		false,
+		NULL, NULL, NULL
+	},
 
 	{
 		{"log_checkpoints", PGC_SIGHUP, LOGGING_WHAT,
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index b053659..b367e2c 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -186,6 +186,7 @@
 					#   fsync_writethrough
 					#   open_sync
 #full_page_writes = on			# recover from partial page writes
+#wal_compression = off			# enable compression of full-page writes
 #wal_log_hints = off			# also do full page writes of non-critical updates
 					# (change requires restart)
 #wal_buffers = -1			# min 32kB, -1 sets based on shared_buffers
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 138deaf..86ed05c 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -98,6 +98,7 @@ extern char *XLogArchiveCommand;
 extern bool EnableHotStandby;
 extern bool fullPageWrites;
 extern bool wal_log_hints;
+extern bool wal_compression;
 extern bool log_checkpoints;
 
 /* WAL levels */
diff --git a/src/include/access/xlogreader.h b/src/include/access/xlogreader.h
index 74bec20..6e8005e 100644
--- a/src/include/access/xlogreader.h
+++ b/src/include/access/xlogreader.h
@@ -52,9 +52,11 @@ typedef struct
 
 	/* Information on full-page image, if any */
 	bool		has_image;
+	bool		is_compressed;
 	char	   *bkp_image;
+	uint16		bkp_len;
+	uint16		bkp_uncompress_len;
 	uint16		hole_offset;
-	uint16		hole_length;
 
 	/* Buffer holding the rmgr-specific data associated with this block */
 	bool		has_data;
@@ -138,6 +140,9 @@ struct XLogReaderState
 	/* Buffer for currently read page (XLOG_BLCKSZ bytes) */
 	char	   *readBuf;
 
+	/* Scratch buffer used for uncompressed pages */
+	char	   *uncompressBuf;
+
 	/* last read segment, segment offset, read length, TLI */
 	XLogSegNo	readSegNo;
 	uint32		readOff;
diff --git a/src/include/access/xlogrecord.h b/src/include/access/xlogrecord.h
index ff77db8..a805182 100644
--- a/src/include/access/xlogrecord.h
+++ b/src/include/access/xlogrecord.h
@@ -98,28 +98,61 @@ typedef struct XLogRecordBlockHeader
  * Additional header information when a full-page image is included
  * (i.e. when BKPBLOCK_HAS_IMAGE is set).
  *
- * As a trivial form of data compression, the XLOG code is aware that
- * PG data pages usually contain an unused "hole" in the middle, which
- * contains only zero bytes.  If hole_length > 0 then we have removed
- * such a "hole" from the stored data (and it's not counted in the
- * XLOG record's CRC, either).  Hence, the amount of block data actually
- * present is BLCKSZ - hole_length bytes.
+ * Block images are able to do several types of compression:
+ * - When wal_compression is off, as a trivial form of compression, the
+ * XLOG code is aware that PG data pages usually contain an unused "hole"
+ * in the middle, which contains only zero bytes.  If length < BLCKSZ
+ * then we have removed such a "hole" from the stored data (and it is
+ * not counted in the XLOG record's CRC, either).  Hence, the amount
+ * of block data actually present is "length" bytes.  The hole "offset"
+ * on page is defined using "hole_offset".
+ * - When wal_compression is on, block images are compressed using a
+ * compression algorithm without their hole to improve compression
+ * process of the page. "length" corresponds in this case to the length
+ * of the block compressed. "hole_offset" is the hole offset of the page,
+ * and the length of the uncompressed block is defined by "raw_length",
+ * whose data is included in the record only when compression is enabled.
+ *
+ * "is_compressed" is used to identify if a given block image is compressed
+ * or not. Maximum page size allowed on the system being 32k, the hole
+ * offset cannot be more than 15-bit long so the last free bit is used to
+ * store the compression state of block image. If the maximum page size
+ * allowed is increased to a value higher than that, we should consider
+ * increasing this structure size as well, but this would increase the
+ * length of block header in WAL records with alignment.
  */
 typedef struct XLogRecordBlockImageHeader
 {
-	uint16		hole_offset;	/* number of bytes before "hole" */
-	uint16		hole_length;	/* number of bytes in "hole" */
+	uint16	length;			/* length of uncompressed block data in
+							 * record */
+	uint16	hole_offset:15,	/* number of bytes before "hole" */
+		is_compressed:1;	/* compression status of image */
+
+	/* Followed by the data related to compression if block is compressed */
 } XLogRecordBlockImageHeader;
 
 #define SizeOfXLogRecordBlockImageHeader sizeof(XLogRecordBlockImageHeader)
 
 /*
+ * Extra header information used when a block is compressed. This state
+ * is determined by the previous flag is_compressed.
+ */
+typedef struct XLogRecordBlockImageCompressionInfo
+{
+	uint16	raw_length;		/* raw length of uncompressed block */
+} XLogRecordBlockImageCompressionInfo;
+
+#define SizeOfXLogRecordBlockImageCompressionInfo \
+	sizeof(XLogRecordBlockImageCompressionInfo)
+
+/*
  * Maximum size of the header for a block reference. This is used to size a
  * temporary buffer for constructing the header.
  */
 #define MaxSizeOfXLogRecordBlockHeader \
 	(SizeOfXLogRecordBlockHeader + \
 	 SizeOfXLogRecordBlockImageHeader + \
+	 SizeOfXLogRecordBlockImageCompressionInfo + \
 	 sizeof(RelFileNode) + \
 	 sizeof(BlockNumber))
 
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index 995fb65..7616719 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -35,7 +35,9 @@
    to have large tuples, since fields can be spread across multiple tuples).
    BLCKSZ must be a power of 2. The maximum possible value of BLCKSZ is
    currently 2^15 (32768). This is determined by the 15-bit widths of the
-   lp_off and lp_len fields in ItemIdData (see include/storage/itemid.h).
+   lp_off and lp_len fields in ItemIdData (see include/storage/itemid.h) and
+   XLogRecordBlockImageHeader where page hole offset is limited to 15-bit
+   length (see src/include/access/xlogrecord.h).
    Changing BLCKSZ requires an initdb. */
 #undef BLCKSZ
 
-- 
2.2.2

-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to