The log_newpage function, used to WAL-log a full copy of a page, is missing the trick we normally use for full-page images to leave out the unused space on the block. That's pretty trivial to implement, so we should.

The place where this matters the most is when building a new B-tree index. When wal_level > minimal, all pages in the created index are logged with log_newpage, and by default we leave 10% free space on index pages. So implementing this reduces the amount of WAL generated by index creation by roughly 10%.

Anyone see a problem with this?

- Heikki
*** a/src/backend/access/gin/gininsert.c
--- b/src/backend/access/gin/gininsert.c
***************
*** 435,444 **** ginbuildempty(PG_FUNCTION_ARGS)
  	START_CRIT_SECTION();
  	GinInitMetabuffer(MetaBuffer);
  	MarkBufferDirty(MetaBuffer);
! 	log_newpage_buffer(MetaBuffer);
  	GinInitBuffer(RootBuffer, GIN_LEAF);
  	MarkBufferDirty(RootBuffer);
! 	log_newpage_buffer(RootBuffer);
  	END_CRIT_SECTION();
  
  	/* Unlock and release the buffers. */
--- 435,444 ----
  	START_CRIT_SECTION();
  	GinInitMetabuffer(MetaBuffer);
  	MarkBufferDirty(MetaBuffer);
! 	log_newpage_buffer(MetaBuffer, false);
  	GinInitBuffer(RootBuffer, GIN_LEAF);
  	MarkBufferDirty(RootBuffer);
! 	log_newpage_buffer(RootBuffer, false);
  	END_CRIT_SECTION();
  
  	/* Unlock and release the buffers. */
*** a/src/backend/access/gist/gist.c
--- b/src/backend/access/gist/gist.c
***************
*** 83,89 **** gistbuildempty(PG_FUNCTION_ARGS)
  	START_CRIT_SECTION();
  	GISTInitBuffer(buffer, F_LEAF);
  	MarkBufferDirty(buffer);
! 	log_newpage_buffer(buffer);
  	END_CRIT_SECTION();
  
  	/* Unlock and release the buffer */
--- 83,89 ----
  	START_CRIT_SECTION();
  	GISTInitBuffer(buffer, F_LEAF);
  	MarkBufferDirty(buffer);
! 	log_newpage_buffer(buffer, true);
  	END_CRIT_SECTION();
  
  	/* Unlock and release the buffer */
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
***************
*** 6207,6222 **** log_heap_update(Relation reln, Buffer oldbuf,
   * memory and writing them directly to smgr.  If you're using buffers, call
   * log_newpage_buffer instead.
   *
!  * Note: the NEWPAGE log record is used for both heaps and indexes, so do
!  * not do anything that assumes we are touching a heap.
   */
  XLogRecPtr
  log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
! 			Page page)
  {
  	xl_heap_newpage xlrec;
  	XLogRecPtr	recptr;
! 	XLogRecData rdata[2];
  
  	/* NO ELOG(ERROR) from here till newpage op is logged */
  	START_CRIT_SECTION();
--- 6207,6228 ----
   * memory and writing them directly to smgr.  If you're using buffers, call
   * log_newpage_buffer instead.
   *
!  * If the page follows the standard page layout, with a PageHeader and unused
!  * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
!  * the unused space to be left out from the WAL record, making it smaller.
   */
  XLogRecPtr
  log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
! 			Page page, bool page_std)
  {
  	xl_heap_newpage xlrec;
  	XLogRecPtr	recptr;
! 	XLogRecData rdata[3];
! 
! 	/*
! 	 * Note: the NEWPAGE log record is used for both heaps and indexes, so do
! 	 * not do anything that assumes we are touching a heap.
! 	 */
  
  	/* NO ELOG(ERROR) from here till newpage op is logged */
  	START_CRIT_SECTION();
***************
*** 6225,6239 **** log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
  	xlrec.forknum = forkNum;
  	xlrec.blkno = blkno;
  
  	rdata[0].data = (char *) &xlrec;
  	rdata[0].len = SizeOfHeapNewpage;
  	rdata[0].buffer = InvalidBuffer;
  	rdata[0].next = &(rdata[1]);
  
! 	rdata[1].data = (char *) page;
! 	rdata[1].len = BLCKSZ;
! 	rdata[1].buffer = InvalidBuffer;
! 	rdata[1].next = NULL;
  
  	recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
  
--- 6231,6288 ----
  	xlrec.forknum = forkNum;
  	xlrec.blkno = blkno;
  
+ 	if (page_std)
+ 	{
+ 		/* Assume we can omit data between pd_lower and pd_upper */
+ 		uint16		lower = ((PageHeader) page)->pd_lower;
+ 		uint16		upper = ((PageHeader) page)->pd_upper;
+ 
+ 		if (lower >= SizeOfPageHeaderData &&
+ 			upper > lower &&
+ 			upper <= BLCKSZ)
+ 		{
+ 			xlrec.hole_offset = lower;
+ 			xlrec.hole_length = upper - lower;
+ 		}
+ 		else
+ 		{
+ 			/* No "hole" to compress out */
+ 			xlrec.hole_offset = 0;
+ 			xlrec.hole_length = 0;
+ 		}
+ 	}
+ 	else
+ 	{
+ 		/* Not a standard page header, don't try to eliminate "hole" */
+ 		xlrec.hole_offset = 0;
+ 		xlrec.hole_length = 0;
+ 	}
+ 
  	rdata[0].data = (char *) &xlrec;
  	rdata[0].len = SizeOfHeapNewpage;
  	rdata[0].buffer = InvalidBuffer;
  	rdata[0].next = &(rdata[1]);
  
! 	if (xlrec.hole_length == 0)
! 	{
! 		rdata[1].data = (char *) page;
! 		rdata[1].len = BLCKSZ;
! 		rdata[1].buffer = InvalidBuffer;
! 		rdata[1].next = NULL;
! 	}
! 	else
! 	{
! 		/* must skip the hole */
! 		rdata[1].data = (char *) page;
! 		rdata[1].len = xlrec.hole_offset;
! 		rdata[1].buffer = InvalidBuffer;
! 		rdata[1].next = &rdata[2];
! 
! 		rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
! 		rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
! 		rdata[2].buffer = InvalidBuffer;
! 		rdata[2].next = NULL;
! 	}
  
  	recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
  
***************
*** 6257,6300 **** log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
   * Caller should initialize the buffer and mark it dirty before calling this
   * function.  This function will set the page LSN and TLI.
   *
!  * Note: the NEWPAGE log record is used for both heaps and indexes, so do
!  * not do anything that assumes we are touching a heap.
   */
  XLogRecPtr
! log_newpage_buffer(Buffer buffer)
  {
- 	xl_heap_newpage xlrec;
- 	XLogRecPtr	recptr;
- 	XLogRecData rdata[2];
  	Page		page = BufferGetPage(buffer);
  
! 	/* We should be in a critical section. */
  	Assert(CritSectionCount > 0);
  
! 	BufferGetTag(buffer, &xlrec.node, &xlrec.forknum, &xlrec.blkno);
! 
! 	rdata[0].data = (char *) &xlrec;
! 	rdata[0].len = SizeOfHeapNewpage;
! 	rdata[0].buffer = InvalidBuffer;
! 	rdata[0].next = &(rdata[1]);
! 
! 	rdata[1].data = page;
! 	rdata[1].len = BLCKSZ;
! 	rdata[1].buffer = InvalidBuffer;
! 	rdata[1].next = NULL;
! 
! 	recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
! 
! 	/*
! 	 * The page may be uninitialized. If so, we can't set the LSN and TLI
! 	 * because that would corrupt the page.
! 	 */
! 	if (!PageIsNew(page))
! 	{
! 		PageSetLSN(page, recptr);
! 	}
  
! 	return recptr;
  }
  
  /*
--- 6306,6329 ----
   * Caller should initialize the buffer and mark it dirty before calling this
   * function.  This function will set the page LSN and TLI.
   *
!  * If the page follows the standard page layout, with a PageHeader and unused
!  * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
!  * the unused space to be left out from the WAL record, making it smaller.
   */
  XLogRecPtr
! log_newpage_buffer(Buffer buffer, bool page_std)
  {
  	Page		page = BufferGetPage(buffer);
+ 	RelFileNode rnode;
+ 	ForkNumber forkNum;
+ 	BlockNumber blkno;
  
! 	/* Shared buffers should be modified in a critical section. */
  	Assert(CritSectionCount > 0);
  
! 	BufferGetTag(buffer, &rnode, &forkNum, &blkno);
  
! 	return log_newpage(&rnode, forkNum, blkno, page, page_std);
  }
  
  /*
***************
*** 6582,6593 **** static void
--- 6611,6625 ----
  heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
  {
  	xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record);
+ 	char	   *blk = ((char *) xlrec) + sizeof(xl_heap_newpage);
  	Buffer		buffer;
  	Page		page;
  
  	/* Backup blocks are not used in newpage records */
  	Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
  
+ 	Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ - xlrec->hole_length);
+ 
  	/*
  	 * Note: the NEWPAGE log record is used for both heaps and indexes, so do
  	 * not do anything that assumes we are touching a heap.
***************
*** 6598,6605 **** heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
  	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
  	page = (Page) BufferGetPage(buffer);
  
! 	Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ);
! 	memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ);
  
  	/*
  	 * The page may be uninitialized. If so, we can't set the LSN because that
--- 6630,6648 ----
  	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
  	page = (Page) BufferGetPage(buffer);
  
! 	if (xlrec->hole_length == 0)
! 	{
! 		memcpy((char *) page, blk, BLCKSZ);
! 	}
! 	else
! 	{
! 		memcpy((char *) page, blk, xlrec->hole_offset);
! 		/* must zero-fill the hole */
! 		MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length);
! 		memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length),
! 			   blk + xlrec->hole_offset,
! 			   BLCKSZ - (xlrec->hole_offset + xlrec->hole_length));
! 	}
  
  	/*
  	 * The page may be uninitialized. If so, we can't set the LSN because that
*** a/src/backend/access/heap/rewriteheap.c
--- b/src/backend/access/heap/rewriteheap.c
***************
*** 277,283 **** end_heap_rewrite(RewriteState state)
  			log_newpage(&state->rs_new_rel->rd_node,
  						MAIN_FORKNUM,
  						state->rs_blockno,
! 						state->rs_buffer);
  		RelationOpenSmgr(state->rs_new_rel);
  
  		PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
--- 277,284 ----
  			log_newpage(&state->rs_new_rel->rd_node,
  						MAIN_FORKNUM,
  						state->rs_blockno,
! 						state->rs_buffer,
! 						true);
  		RelationOpenSmgr(state->rs_new_rel);
  
  		PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
***************
*** 622,628 **** raw_heap_insert(RewriteState state, HeapTuple tup)
  				log_newpage(&state->rs_new_rel->rd_node,
  							MAIN_FORKNUM,
  							state->rs_blockno,
! 							page);
  
  			/*
  			 * Now write the page. We say isTemp = true even if it's not a
--- 623,630 ----
  				log_newpage(&state->rs_new_rel->rd_node,
  							MAIN_FORKNUM,
  							state->rs_blockno,
! 							page,
! 							true);
  
  			/*
  			 * Now write the page. We say isTemp = true even if it's not a
*** a/src/backend/access/nbtree/nbtree.c
--- b/src/backend/access/nbtree/nbtree.c
***************
*** 222,228 **** btbuildempty(PG_FUNCTION_ARGS)
  			  (char *) metapage, true);
  	if (XLogIsNeeded())
  		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! 					BTREE_METAPAGE, metapage);
  
  	/*
  	 * An immediate sync is require even if we xlog'd the page, because the
--- 222,228 ----
  			  (char *) metapage, true);
  	if (XLogIsNeeded())
  		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! 					BTREE_METAPAGE, metapage, false);
  
  	/*
  	 * An immediate sync is require even if we xlog'd the page, because the
*** a/src/backend/access/nbtree/nbtsort.c
--- b/src/backend/access/nbtree/nbtsort.c
***************
*** 274,280 **** _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
  	if (wstate->btws_use_wal)
  	{
  		/* We use the heap NEWPAGE record type for this */
! 		log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page);
  	}
  
  	/*
--- 274,280 ----
  	if (wstate->btws_use_wal)
  	{
  		/* We use the heap NEWPAGE record type for this */
! 		log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
  	}
  
  	/*
*** a/src/backend/access/spgist/spginsert.c
--- b/src/backend/access/spgist/spginsert.c
***************
*** 169,175 **** spgbuildempty(PG_FUNCTION_ARGS)
  			  (char *) page, true);
  	if (XLogIsNeeded())
  		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! 					SPGIST_METAPAGE_BLKNO, page);
  
  	/* Likewise for the root page. */
  	SpGistInitPage(page, SPGIST_LEAF);
--- 169,175 ----
  			  (char *) page, true);
  	if (XLogIsNeeded())
  		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! 					SPGIST_METAPAGE_BLKNO, page, false);
  
  	/* Likewise for the root page. */
  	SpGistInitPage(page, SPGIST_LEAF);
***************
*** 179,185 **** spgbuildempty(PG_FUNCTION_ARGS)
  			  (char *) page, true);
  	if (XLogIsNeeded())
  		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! 					SPGIST_ROOT_BLKNO, page);
  
  	/* Likewise for the null-tuples root page. */
  	SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
--- 179,185 ----
  			  (char *) page, true);
  	if (XLogIsNeeded())
  		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! 					SPGIST_ROOT_BLKNO, page, false);
  
  	/* Likewise for the null-tuples root page. */
  	SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
***************
*** 189,195 **** spgbuildempty(PG_FUNCTION_ARGS)
  			  (char *) page, true);
  	if (XLogIsNeeded())
  		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! 					SPGIST_NULL_BLKNO, page);
  
  	/*
  	 * An immediate sync is required even if we xlog'd the pages, because the
--- 189,195 ----
  			  (char *) page, true);
  	if (XLogIsNeeded())
  		log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! 					SPGIST_NULL_BLKNO, page, false);
  
  	/*
  	 * An immediate sync is required even if we xlog'd the pages, because the
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 9132,9138 **** copy_relation_data(SMgrRelation src, SMgrRelation dst,
  
  		/* XLOG stuff */
  		if (use_wal)
! 			log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page);
  
  		PageSetChecksumInplace(page, blkno);
  
--- 9132,9138 ----
  
  		/* XLOG stuff */
  		if (use_wal)
! 			log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
  
  		PageSetChecksumInplace(page, blkno);
  
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 706,712 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
  				 */
  				if (RelationNeedsWAL(onerel) &&
  					PageGetLSN(page) == InvalidXLogRecPtr)
! 					log_newpage_buffer(buf);
  
  				PageSetAllVisible(page);
  				visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
--- 706,712 ----
  				 */
  				if (RelationNeedsWAL(onerel) &&
  					PageGetLSN(page) == InvalidXLogRecPtr)
! 					log_newpage_buffer(buf, false);
  
  				PageSetAllVisible(page);
  				visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
*** a/src/include/access/heapam_xlog.h
--- b/src/include/access/heapam_xlog.h
***************
*** 198,207 **** typedef struct xl_heap_newpage
  	RelFileNode node;
  	ForkNumber	forknum;
  	BlockNumber blkno;			/* location of new page */
! 	/* entire page contents follow at end of record */
  } xl_heap_newpage;
  
! #define SizeOfHeapNewpage	(offsetof(xl_heap_newpage, blkno) + sizeof(BlockNumber))
  
  /* flags for infobits_set */
  #define XLHL_XMAX_IS_MULTI		0x01
--- 198,209 ----
  	RelFileNode node;
  	ForkNumber	forknum;
  	BlockNumber blkno;			/* location of new page */
! 	uint16		hole_offset;	/* number of bytes before "hole" */
! 	uint16		hole_length;	/* number of bytes in "hole" */
! 	/* entire page contents (minus the hole) follow at end of record */
  } xl_heap_newpage;
  
! #define SizeOfHeapNewpage	(offsetof(xl_heap_newpage, hole_length) + sizeof(uint16))
  
  /* flags for infobits_set */
  #define XLHL_XMAX_IS_MULTI		0x01
***************
*** 282,288 **** extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
  extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
  				 Buffer vm_buffer, TransactionId cutoff_xid);
  extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
! 			BlockNumber blk, Page page);
! extern XLogRecPtr log_newpage_buffer(Buffer buffer);
  
  #endif   /* HEAPAM_XLOG_H */
--- 284,290 ----
  extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
  				 Buffer vm_buffer, TransactionId cutoff_xid);
  extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
! 			BlockNumber blk, Page page, bool page_std);
! extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
  
  #endif   /* HEAPAM_XLOG_H */
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to