The log_newpage function, used to WAL-log a full copy of a page, is
missing the trick we normally use for full-page images to leave out the
unused space on the block. That's pretty trivial to implement, so we should.
The place where this matters the most is when building a new B-tree
index. When wal_level > minimal, all pages in the created index are
logged with log_newpage, and by default we leave 10% free space on index
pages. So implementing this reduces the amount of WAL generated by index
creation by roughly 10%.
Anyone see a problem with this?
- Heikki
*** a/src/backend/access/gin/gininsert.c
--- b/src/backend/access/gin/gininsert.c
***************
*** 435,444 **** ginbuildempty(PG_FUNCTION_ARGS)
START_CRIT_SECTION();
GinInitMetabuffer(MetaBuffer);
MarkBufferDirty(MetaBuffer);
! log_newpage_buffer(MetaBuffer);
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
! log_newpage_buffer(RootBuffer);
END_CRIT_SECTION();
/* Unlock and release the buffers. */
--- 435,444 ----
START_CRIT_SECTION();
GinInitMetabuffer(MetaBuffer);
MarkBufferDirty(MetaBuffer);
! log_newpage_buffer(MetaBuffer, false);
GinInitBuffer(RootBuffer, GIN_LEAF);
MarkBufferDirty(RootBuffer);
! log_newpage_buffer(RootBuffer, false);
END_CRIT_SECTION();
/* Unlock and release the buffers. */
*** a/src/backend/access/gist/gist.c
--- b/src/backend/access/gist/gist.c
***************
*** 83,89 **** gistbuildempty(PG_FUNCTION_ARGS)
START_CRIT_SECTION();
GISTInitBuffer(buffer, F_LEAF);
MarkBufferDirty(buffer);
! log_newpage_buffer(buffer);
END_CRIT_SECTION();
/* Unlock and release the buffer */
--- 83,89 ----
START_CRIT_SECTION();
GISTInitBuffer(buffer, F_LEAF);
MarkBufferDirty(buffer);
! log_newpage_buffer(buffer, true);
END_CRIT_SECTION();
/* Unlock and release the buffer */
*** a/src/backend/access/heap/heapam.c
--- b/src/backend/access/heap/heapam.c
***************
*** 6207,6222 **** log_heap_update(Relation reln, Buffer oldbuf,
* memory and writing them directly to smgr. If you're using buffers, call
* log_newpage_buffer instead.
*
! * Note: the NEWPAGE log record is used for both heaps and indexes, so do
! * not do anything that assumes we are touching a heap.
*/
XLogRecPtr
log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
! Page page)
{
xl_heap_newpage xlrec;
XLogRecPtr recptr;
! XLogRecData rdata[2];
/* NO ELOG(ERROR) from here till newpage op is logged */
START_CRIT_SECTION();
--- 6207,6228 ----
* memory and writing them directly to smgr. If you're using buffers, call
* log_newpage_buffer instead.
*
! * If the page follows the standard page layout, with a PageHeader and unused
! * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
! * the unused space to be left out from the WAL record, making it smaller.
*/
XLogRecPtr
log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
! Page page, bool page_std)
{
xl_heap_newpage xlrec;
XLogRecPtr recptr;
! XLogRecData rdata[3];
!
! /*
! * Note: the NEWPAGE log record is used for both heaps and indexes, so do
! * not do anything that assumes we are touching a heap.
! */
/* NO ELOG(ERROR) from here till newpage op is logged */
START_CRIT_SECTION();
***************
*** 6225,6239 **** log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
xlrec.forknum = forkNum;
xlrec.blkno = blkno;
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapNewpage;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
! rdata[1].data = (char *) page;
! rdata[1].len = BLCKSZ;
! rdata[1].buffer = InvalidBuffer;
! rdata[1].next = NULL;
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
--- 6231,6288 ----
xlrec.forknum = forkNum;
xlrec.blkno = blkno;
+ if (page_std)
+ {
+ /* Assume we can omit data between pd_lower and pd_upper */
+ uint16 lower = ((PageHeader) page)->pd_lower;
+ uint16 upper = ((PageHeader) page)->pd_upper;
+
+ if (lower >= SizeOfPageHeaderData &&
+ upper > lower &&
+ upper <= BLCKSZ)
+ {
+ xlrec.hole_offset = lower;
+ xlrec.hole_length = upper - lower;
+ }
+ else
+ {
+ /* No "hole" to compress out */
+ xlrec.hole_offset = 0;
+ xlrec.hole_length = 0;
+ }
+ }
+ else
+ {
+ /* Not a standard page header, don't try to eliminate "hole" */
+ xlrec.hole_offset = 0;
+ xlrec.hole_length = 0;
+ }
+
rdata[0].data = (char *) &xlrec;
rdata[0].len = SizeOfHeapNewpage;
rdata[0].buffer = InvalidBuffer;
rdata[0].next = &(rdata[1]);
! if (xlrec.hole_length == 0)
! {
! rdata[1].data = (char *) page;
! rdata[1].len = BLCKSZ;
! rdata[1].buffer = InvalidBuffer;
! rdata[1].next = NULL;
! }
! else
! {
! /* must skip the hole */
! rdata[1].data = (char *) page;
! rdata[1].len = xlrec.hole_offset;
! rdata[1].buffer = InvalidBuffer;
! rdata[1].next = &rdata[2];
!
! rdata[2].data = (char *) page + (xlrec.hole_offset + xlrec.hole_length);
! rdata[2].len = BLCKSZ - (xlrec.hole_offset + xlrec.hole_length);
! rdata[2].buffer = InvalidBuffer;
! rdata[2].next = NULL;
! }
recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
***************
*** 6257,6300 **** log_newpage(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
* Caller should initialize the buffer and mark it dirty before calling this
* function. This function will set the page LSN and TLI.
*
! * Note: the NEWPAGE log record is used for both heaps and indexes, so do
! * not do anything that assumes we are touching a heap.
*/
XLogRecPtr
! log_newpage_buffer(Buffer buffer)
{
- xl_heap_newpage xlrec;
- XLogRecPtr recptr;
- XLogRecData rdata[2];
Page page = BufferGetPage(buffer);
! /* We should be in a critical section. */
Assert(CritSectionCount > 0);
! BufferGetTag(buffer, &xlrec.node, &xlrec.forknum, &xlrec.blkno);
!
! rdata[0].data = (char *) &xlrec;
! rdata[0].len = SizeOfHeapNewpage;
! rdata[0].buffer = InvalidBuffer;
! rdata[0].next = &(rdata[1]);
!
! rdata[1].data = page;
! rdata[1].len = BLCKSZ;
! rdata[1].buffer = InvalidBuffer;
! rdata[1].next = NULL;
!
! recptr = XLogInsert(RM_HEAP_ID, XLOG_HEAP_NEWPAGE, rdata);
!
! /*
! * The page may be uninitialized. If so, we can't set the LSN and TLI
! * because that would corrupt the page.
! */
! if (!PageIsNew(page))
! {
! PageSetLSN(page, recptr);
! }
! return recptr;
}
/*
--- 6306,6329 ----
* Caller should initialize the buffer and mark it dirty before calling this
* function. This function will set the page LSN and TLI.
*
! * If the page follows the standard page layout, with a PageHeader and unused
! * space between pd_lower and pd_upper, set 'page_std' to TRUE. That allows
! * the unused space to be left out from the WAL record, making it smaller.
*/
XLogRecPtr
! log_newpage_buffer(Buffer buffer, bool page_std)
{
Page page = BufferGetPage(buffer);
+ RelFileNode rnode;
+ ForkNumber forkNum;
+ BlockNumber blkno;
! /* Shared buffers should be modified in a critical section. */
Assert(CritSectionCount > 0);
! BufferGetTag(buffer, &rnode, &forkNum, &blkno);
! return log_newpage(&rnode, forkNum, blkno, page, page_std);
}
/*
***************
*** 6582,6593 **** static void
--- 6611,6625 ----
heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
{
xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record);
+ char *blk = ((char *) xlrec) + sizeof(xl_heap_newpage);
Buffer buffer;
Page page;
/* Backup blocks are not used in newpage records */
Assert(!(record->xl_info & XLR_BKP_BLOCK_MASK));
+ Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ - xlrec->hole_length);
+
/*
* Note: the NEWPAGE log record is used for both heaps and indexes, so do
* not do anything that assumes we are touching a heap.
***************
*** 6598,6605 **** heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);
! Assert(record->xl_len == SizeOfHeapNewpage + BLCKSZ);
! memcpy(page, (char *) xlrec + SizeOfHeapNewpage, BLCKSZ);
/*
* The page may be uninitialized. If so, we can't set the LSN because that
--- 6630,6648 ----
LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
page = (Page) BufferGetPage(buffer);
! if (xlrec->hole_length == 0)
! {
! memcpy((char *) page, blk, BLCKSZ);
! }
! else
! {
! memcpy((char *) page, blk, xlrec->hole_offset);
! /* must zero-fill the hole */
! MemSet((char *) page + xlrec->hole_offset, 0, xlrec->hole_length);
! memcpy((char *) page + (xlrec->hole_offset + xlrec->hole_length),
! blk + xlrec->hole_offset,
! BLCKSZ - (xlrec->hole_offset + xlrec->hole_length));
! }
/*
* The page may be uninitialized. If so, we can't set the LSN because that
*** a/src/backend/access/heap/rewriteheap.c
--- b/src/backend/access/heap/rewriteheap.c
***************
*** 277,283 **** end_heap_rewrite(RewriteState state)
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
! state->rs_buffer);
RelationOpenSmgr(state->rs_new_rel);
PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
--- 277,284 ----
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
! state->rs_buffer,
! true);
RelationOpenSmgr(state->rs_new_rel);
PageSetChecksumInplace(state->rs_buffer, state->rs_blockno);
***************
*** 622,628 **** raw_heap_insert(RewriteState state, HeapTuple tup)
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
! page);
/*
* Now write the page. We say isTemp = true even if it's not a
--- 623,630 ----
log_newpage(&state->rs_new_rel->rd_node,
MAIN_FORKNUM,
state->rs_blockno,
! page,
! true);
/*
* Now write the page. We say isTemp = true even if it's not a
*** a/src/backend/access/nbtree/nbtree.c
--- b/src/backend/access/nbtree/nbtree.c
***************
*** 222,228 **** btbuildempty(PG_FUNCTION_ARGS)
(char *) metapage, true);
if (XLogIsNeeded())
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! BTREE_METAPAGE, metapage);
/*
* An immediate sync is require even if we xlog'd the page, because the
--- 222,228 ----
(char *) metapage, true);
if (XLogIsNeeded())
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! BTREE_METAPAGE, metapage, false);
/*
* An immediate sync is require even if we xlog'd the page, because the
*** a/src/backend/access/nbtree/nbtsort.c
--- b/src/backend/access/nbtree/nbtsort.c
***************
*** 274,280 **** _bt_blwritepage(BTWriteState *wstate, Page page, BlockNumber blkno)
if (wstate->btws_use_wal)
{
/* We use the heap NEWPAGE record type for this */
! log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page);
}
/*
--- 274,280 ----
if (wstate->btws_use_wal)
{
/* We use the heap NEWPAGE record type for this */
! log_newpage(&wstate->index->rd_node, MAIN_FORKNUM, blkno, page, true);
}
/*
*** a/src/backend/access/spgist/spginsert.c
--- b/src/backend/access/spgist/spginsert.c
***************
*** 169,175 **** spgbuildempty(PG_FUNCTION_ARGS)
(char *) page, true);
if (XLogIsNeeded())
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! SPGIST_METAPAGE_BLKNO, page);
/* Likewise for the root page. */
SpGistInitPage(page, SPGIST_LEAF);
--- 169,175 ----
(char *) page, true);
if (XLogIsNeeded())
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! SPGIST_METAPAGE_BLKNO, page, false);
/* Likewise for the root page. */
SpGistInitPage(page, SPGIST_LEAF);
***************
*** 179,185 **** spgbuildempty(PG_FUNCTION_ARGS)
(char *) page, true);
if (XLogIsNeeded())
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! SPGIST_ROOT_BLKNO, page);
/* Likewise for the null-tuples root page. */
SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
--- 179,185 ----
(char *) page, true);
if (XLogIsNeeded())
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! SPGIST_ROOT_BLKNO, page, false);
/* Likewise for the null-tuples root page. */
SpGistInitPage(page, SPGIST_LEAF | SPGIST_NULLS);
***************
*** 189,195 **** spgbuildempty(PG_FUNCTION_ARGS)
(char *) page, true);
if (XLogIsNeeded())
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! SPGIST_NULL_BLKNO, page);
/*
* An immediate sync is required even if we xlog'd the pages, because the
--- 189,195 ----
(char *) page, true);
if (XLogIsNeeded())
log_newpage(&index->rd_smgr->smgr_rnode.node, INIT_FORKNUM,
! SPGIST_NULL_BLKNO, page, false);
/*
* An immediate sync is required even if we xlog'd the pages, because the
*** a/src/backend/commands/tablecmds.c
--- b/src/backend/commands/tablecmds.c
***************
*** 9132,9138 **** copy_relation_data(SMgrRelation src, SMgrRelation dst,
/* XLOG stuff */
if (use_wal)
! log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page);
PageSetChecksumInplace(page, blkno);
--- 9132,9138 ----
/* XLOG stuff */
if (use_wal)
! log_newpage(&dst->smgr_rnode.node, forkNum, blkno, page, false);
PageSetChecksumInplace(page, blkno);
*** a/src/backend/commands/vacuumlazy.c
--- b/src/backend/commands/vacuumlazy.c
***************
*** 706,712 **** lazy_scan_heap(Relation onerel, LVRelStats *vacrelstats,
*/
if (RelationNeedsWAL(onerel) &&
PageGetLSN(page) == InvalidXLogRecPtr)
! log_newpage_buffer(buf);
PageSetAllVisible(page);
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
--- 706,712 ----
*/
if (RelationNeedsWAL(onerel) &&
PageGetLSN(page) == InvalidXLogRecPtr)
! log_newpage_buffer(buf, false);
PageSetAllVisible(page);
visibilitymap_set(onerel, blkno, buf, InvalidXLogRecPtr,
*** a/src/include/access/heapam_xlog.h
--- b/src/include/access/heapam_xlog.h
***************
*** 198,207 **** typedef struct xl_heap_newpage
RelFileNode node;
ForkNumber forknum;
BlockNumber blkno; /* location of new page */
! /* entire page contents follow at end of record */
} xl_heap_newpage;
! #define SizeOfHeapNewpage (offsetof(xl_heap_newpage, blkno) + sizeof(BlockNumber))
/* flags for infobits_set */
#define XLHL_XMAX_IS_MULTI 0x01
--- 198,209 ----
RelFileNode node;
ForkNumber forknum;
BlockNumber blkno; /* location of new page */
! uint16 hole_offset; /* number of bytes before "hole" */
! uint16 hole_length; /* number of bytes in "hole" */
! /* entire page contents (minus the hole) follow at end of record */
} xl_heap_newpage;
! #define SizeOfHeapNewpage (offsetof(xl_heap_newpage, hole_length) + sizeof(uint16))
/* flags for infobits_set */
#define XLHL_XMAX_IS_MULTI 0x01
***************
*** 282,288 **** extern XLogRecPtr log_heap_freeze(Relation reln, Buffer buffer,
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
Buffer vm_buffer, TransactionId cutoff_xid);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
! BlockNumber blk, Page page);
! extern XLogRecPtr log_newpage_buffer(Buffer buffer);
#endif /* HEAPAM_XLOG_H */
--- 284,290 ----
extern XLogRecPtr log_heap_visible(RelFileNode rnode, Buffer heap_buffer,
Buffer vm_buffer, TransactionId cutoff_xid);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
! BlockNumber blk, Page page, bool page_std);
! extern XLogRecPtr log_newpage_buffer(Buffer buffer, bool page_std);
#endif /* HEAPAM_XLOG_H */
--
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers