Alvaro Herrera wrote:
> Alvaro Herrera wrote:
>
> > Hmm, oh I see another problem here -- the bit is not restored when
> > replayed heap_update's WAL record. I'm now wondering what other bits
> > are set without much care about correctly restoring them during replay.
>
> I'm now wondering whether it'd be easier to just ignore pd_flags in
> calculating the checksum.
Okay, so this is what I've done. pd_flags is skipped. Also the WAL
routine logs both HeapTupleHeader infomasks and ItemId->lp_flags. On
the latter point I'm not 100% sure of the cases where lp_flags must be
logged; right now I'm only logging if the item is marked as "having
storage" (the logic being that if an item does not have storage, then
making it have requires a WAL entry, and vice versa).
(This version has some debugging log entries which are obviously only
WIP material.)
--
Alvaro Herrera http://www.CommandPrompt.com/
PostgreSQL Replication, Consulting, Custom Development, 24x7 support
Index: src/backend/access/gist/gistget.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/access/gist/gistget.c,v
retrieving revision 1.79
diff -c -p -r1.79 gistget.c
*** src/backend/access/gist/gistget.c 22 Oct 2008 12:53:56 -0000 1.79
--- src/backend/access/gist/gistget.c 5 Nov 2008 21:17:40 -0000
*************** killtuple(Relation r, GISTScanOpaque so,
*** 43,48 ****
--- 43,49 ----
/* page unchanged, so all is simple */
offset = ItemPointerGetOffsetNumber(iptr);
ItemIdMarkDead(PageGetItemId(p, offset));
+ PageSetUnloggedChange(p);
SetBufferCommitInfoNeedsSave(so->curbuf);
}
else
*************** killtuple(Relation r, GISTScanOpaque so,
*** 57,62 ****
--- 58,64 ----
{
/* found */
ItemIdMarkDead(PageGetItemId(p, offset));
+ PageSetUnloggedChange(p);
SetBufferCommitInfoNeedsSave(so->curbuf);
break;
}
Index: src/backend/access/hash/hash.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/access/hash/hash.c,v
retrieving revision 1.106
diff -c -p -r1.106 hash.c
*** src/backend/access/hash/hash.c 17 Oct 2008 23:50:57 -0000 1.106
--- src/backend/access/hash/hash.c 5 Nov 2008 21:17:47 -0000
*************** hashgettuple(PG_FUNCTION_ARGS)
*** 239,244 ****
--- 239,245 ----
offnum = ItemPointerGetOffsetNumber(&(so->hashso_curpos));
page = BufferGetPage(so->hashso_curbuf);
ItemIdMarkDead(PageGetItemId(page, offnum));
+ PageSetUnloggedChange(page);
/*
* Since this can be redone later if needed, it's treated the same
Index: src/backend/access/heap/heapam.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/access/heap/heapam.c,v
retrieving revision 1.268
diff -c -p -r1.268 heapam.c
*** src/backend/access/heap/heapam.c 31 Oct 2008 19:40:26 -0000 1.268
--- src/backend/access/heap/heapam.c 7 Nov 2008 19:01:57 -0000
*************** log_newpage(RelFileNode *rnode, ForkNumb
*** 4008,4013 ****
--- 4008,4102 ----
}
/*
+ * Perform XLogInsert for hint bits changes in a page. This handles hint
+ * bits set in HeapTupleHeaderData (t_infomask and t_infomask2) as well as
+ * those set in ItemIdData->lp_flags.
+ *
+ * This is intended to be called right before writing a page from shared
+ * buffers to disk.
+ *
+ * The approach used here, instead of WAL-logging every change, is to produce
+ * a complete record of the current state of hint bits in a page just before
+ * flushing it. There are two downsides to this approach: first, it stores
+ * all hint bits in the page, not only those that changed; and second, that
+ * the flusher of the page needs to flush a lot more of the WAL (namely up
+ * to this new record's LSN) than the original LSN marked on the page.
+ */
+ XLogRecPtr
+ log_hintbits(RelFileNode *rnode, ForkNumber forkNum, BlockNumber blkno,
+ Page page)
+ {
+ xl_heap_hintbits xlrec;
+ OffsetNumber i;
+ XLogRecPtr recptr;
+ XLogRecData rdata[2];
+ char *bits;
+ int pos = 0;
+ StringInfoData buf;
+
+ /*
+ * 1 byte for line pointer bits, 2 bytes for infomask,
+ * 2 bytes for infomask2
+ */
+ bits = palloc(MaxHeapTuplesPerPage * 5);
+
+ initStringInfo(&buf);
+ appendStringInfo(&buf, "page %u: ", blkno);
+
+ for (i = FirstOffsetNumber; i <= PageGetMaxOffsetNumber(page);
+ i = OffsetNumberNext(i))
+ {
+ HeapTupleHeader htup;
+ ItemId lp = PageGetItemId(page, i);
+
+ if (!ItemIdHasStorage(lp))
+ continue;
+
+ appendStringInfo(&buf, "offset %d: flags %02x ", i, lp->lp_flags);
+
+ bits[pos++] = lp->lp_flags;
+ htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+ *((uint16 *) (bits + pos)) = htup->t_infomask & HEAP_XACT_MASK;
+ appendStringInfo(&buf, "infomask %04x/%04x ", htup->t_infomask,
+ htup->t_infomask & HEAP_XACT_MASK);
+ pos += 2;
+ *((uint16 *) (bits + pos)) = htup->t_infomask2 & HEAP2_XACT_MASK;
+ appendStringInfo(&buf, "infomask2 %04x/%04x\n", htup->t_infomask2,
+ htup->t_infomask2 & HEAP2_XACT_MASK);
+ pos += 2;
+ }
+
+ elog(LOG, "%s", buf.data);
+ pfree(buf.data);
+
+ /* NO ELOG(ERROR) from here till hint bits are logged */
+ START_CRIT_SECTION();
+
+ xlrec.node = *rnode;
+ xlrec.block = blkno;
+
+ rdata[0].data = (char *) &xlrec;
+ rdata[0].len = SizeOfHeapHintbits;
+ rdata[0].buffer = InvalidBuffer;
+ rdata[0].next = &(rdata[1]);
+
+ rdata[1].data = (char *) bits;
+ rdata[1].len = pos;
+ rdata[1].buffer = InvalidBuffer;
+ rdata[1].next = NULL;
+
+ recptr = XLogInsert(RM_HEAP2_ID, XLOG_HEAP2_HINTBITS, rdata);
+
+ PageSetLSN(page, recptr);
+ PageSetTLI(page, ThisTimeLineID);
+
+ END_CRIT_SECTION();
+
+ return recptr;
+ }
+
+ /*
* Handles CLEAN and CLEAN_MOVE record types
*/
static void
*************** heap_xlog_freeze(XLogRecPtr lsn, XLogRec
*** 4125,4130 ****
--- 4214,4302 ----
}
static void
+ heap_xlog_hintbits(XLogRecPtr lsn, XLogRecord *record)
+ {
+ xl_heap_hintbits *xlrec = (xl_heap_hintbits *) XLogRecGetData(record);
+ Buffer buffer;
+ Page page;
+
+ buffer = XLogReadBuffer(xlrec->node, xlrec->block, false);
+ if (!BufferIsValid(buffer))
+ return;
+ page = (Page) BufferGetPage(buffer);
+
+ if (XLByteLE(lsn, PageGetLSN(page)))
+ {
+ UnlockReleaseBuffer(buffer);
+ return;
+ }
+
+ if (record->xl_len > SizeOfHeapHintbits)
+ {
+ char *bits;
+ char *bits_end;
+ OffsetNumber offset = FirstOffsetNumber;
+ StringInfoData buf;
+
+
+ bits = (char *) xlrec + SizeOfHeapHintbits;
+ bits_end = (char *) xlrec + record->xl_len;
+
+ initStringInfo(&buf);
+ appendStringInfo(&buf, "page %u: ", xlrec->block);
+
+ while (bits < bits_end)
+ {
+
+ for (;;)
+ {
+ HeapTupleHeader htup;
+ ItemId lp = PageGetItemId(page, offset);
+
+ if (!ItemIdHasStorage(lp))
+ {
+ offset++;
+ continue;
+ }
+
+ /* set the page flags */
+ lp->lp_flags = *bits;
+ bits++;
+ appendStringInfo(&buf, "offset %d: flags %02x ", offset,
+ lp->lp_flags);
+
+ htup = (HeapTupleHeader) PageGetItem(page, lp);
+
+ /* set the right bits in infomask */
+ htup->t_infomask = *(uint16 *) bits |
+ (htup->t_infomask & ~HEAP_XACT_MASK);
+ appendStringInfo(&buf, "infomask %04x/%04x ", htup->t_infomask,
+ *(uint16 *) bits);
+ bits += 2;
+
+ /* set the right bits in infomask2 */
+ htup->t_infomask2 = *(uint16 *) bits |
+ (htup->t_infomask2 & ~HEAP2_XACT_MASK);
+ appendStringInfo(&buf, "infomask2 %04x/%04x\n", htup->t_infomask2,
+ *(uint16 *) bits);
+ bits += 2;
+
+ offset++;
+
+ break;
+ }
+ }
+ elog(LOG, "%s", buf.data);
+ pfree(buf.data);
+ }
+
+ PageSetLSN(page, lsn);
+ PageSetTLI(page, ThisTimeLineID);
+ MarkBufferDirty(buffer);
+ UnlockReleaseBuffer(buffer);
+ }
+
+ static void
heap_xlog_newpage(XLogRecPtr lsn, XLogRecord *record)
{
xl_heap_newpage *xlrec = (xl_heap_newpage *) XLogRecGetData(record);
*************** heap_xlog_update(XLogRecPtr lsn, XLogRec
*** 4389,4394 ****
--- 4561,4568 ----
*/
if (samepage)
goto newsame;
+ if (!hot_update && !move)
+ PageSetFull(page);
PageSetLSN(page, lsn);
PageSetTLI(page, ThisTimeLineID);
MarkBufferDirty(buffer);
*************** heap2_redo(XLogRecPtr lsn, XLogRecord *r
*** 4664,4669 ****
--- 4838,4846 ----
case XLOG_HEAP2_CLEAN_MOVE:
heap_xlog_clean(lsn, record, true);
break;
+ case XLOG_HEAP2_HINTBITS:
+ heap_xlog_hintbits(lsn, record);
+ break;
default:
elog(PANIC, "heap2_redo: unknown op code %u", info);
}
*************** heap2_desc(StringInfo buf, uint8 xl_info
*** 4805,4810 ****
--- 4982,4995 ----
xlrec->node.spcNode, xlrec->node.dbNode,
xlrec->node.relNode, xlrec->block);
}
+ else if (info == XLOG_HEAP2_HINTBITS)
+ {
+ xl_heap_hintbits *xlrec = (xl_heap_hintbits *) rec;
+
+ appendStringInfo(buf, "hintbits: rel %u/%u/%u; blk %u",
+ xlrec->node.spcNode, xlrec->node.dbNode,
+ xlrec->node.relNode, xlrec->block);
+ }
else
appendStringInfo(buf, "UNKNOWN");
}
Index: src/backend/access/nbtree/nbtinsert.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/access/nbtree/nbtinsert.c,v
retrieving revision 1.168
diff -c -p -r1.168 nbtinsert.c
*** src/backend/access/nbtree/nbtinsert.c 3 Nov 2008 20:47:48 -0000 1.168
--- src/backend/access/nbtree/nbtinsert.c 5 Nov 2008 21:18:28 -0000
*************** _bt_check_unique(Relation rel, IndexTupl
*** 308,313 ****
--- 308,314 ----
* killed.
*/
ItemIdMarkDead(curitemid);
+ PageSetUnloggedChange(page);
opaque->btpo_flags |= BTP_HAS_GARBAGE;
/* be sure to mark the proper buffer dirty... */
if (nbuf != InvalidBuffer)
Index: src/backend/access/nbtree/nbtutils.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/access/nbtree/nbtutils.c,v
retrieving revision 1.91
diff -c -p -r1.91 nbtutils.c
*** src/backend/access/nbtree/nbtutils.c 19 Jun 2008 00:46:03 -0000 1.91
--- src/backend/access/nbtree/nbtutils.c 5 Nov 2008 21:20:11 -0000
*************** _bt_killitems(IndexScanDesc scan, bool h
*** 1153,1158 ****
--- 1153,1159 ----
{
/* found the item */
ItemIdMarkDead(iid);
+ PageSetUnloggedChange(page);
killedsomething = true;
break; /* out of inner search loop */
}
Index: src/backend/storage/buffer/bufmgr.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/buffer/bufmgr.c,v
retrieving revision 1.240
diff -c -p -r1.240 bufmgr.c
*** src/backend/storage/buffer/bufmgr.c 31 Oct 2008 15:05:00 -0000 1.240
--- src/backend/storage/buffer/bufmgr.c 7 Nov 2008 19:03:51 -0000
***************
*** 33,38 ****
--- 33,39 ----
#include <sys/file.h>
#include <unistd.h>
+ #include "access/heapam.h"
#include "miscadmin.h"
#include "pg_trace.h"
#include "pgstat.h"
***************
*** 42,47 ****
--- 43,49 ----
#include "storage/ipc.h"
#include "storage/proc.h"
#include "storage/smgr.h"
+ #include "utils/memutils.h"
#include "utils/rel.h"
#include "utils/resowner.h"
*************** BgBufferSync(void)
*** 1464,1470 ****
* BUF_REUSABLE: buffer is available for replacement, ie, it has
* pin count 0 and usage count 0.
*
! * (BUF_WRITTEN could be set in error if FlushBuffers finds the buffer clean
* after locking it, but we don't care all that much.)
*
* Note: caller must have done ResourceOwnerEnlargeBuffers.
--- 1466,1472 ----
* BUF_REUSABLE: buffer is available for replacement, ie, it has
* pin count 0 and usage count 0.
*
! * (BUF_WRITTEN could be set in error if FlushBuffer finds the buffer clean
* after locking it, but we don't care all that much.)
*
* Note: caller must have done ResourceOwnerEnlargeBuffers.
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 1774,1779 ****
--- 1776,1789 ----
{
XLogRecPtr recptr;
ErrorContextCallback errcontext;
+ static char *dblbuf = NULL;
+ bool done = false;
+
+ if (enable_block_checksums && dblbuf == NULL)
+ {
+ dblbuf = MemoryContextAlloc(TopMemoryContext, BLCKSZ + ALIGNOF_BUFFER);
+ dblbuf = (char *) BUFFERALIGN(dblbuf);
+ }
/*
* Acquire the buffer's io_in_progress lock. If StartBufferIO returns
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 1798,1803 ****
--- 1808,1837 ----
reln->smgr_rnode.relNode);
/*
+ * We make a copy of the buffer to write.
+ */
+ if (enable_block_checksums)
+ memcpy(dblbuf, BufHdrGetBlock(buf), BLCKSZ);
+
+ /*
+ * If the page has been modified by a hint bit setter, ensure we WAL-log
+ * their changes before actually writing the page; otherwise the CRC we're
+ * about to store could be invalid if the page is torn. Note: we check
+ * the flag on the shared-memory copy of the buffer, not the private copy
+ * we just made, to forestall the possibility that hints bits could have
+ * been set in the later parts of the page after we copied the flag in
+ * unset state.
+ */
+ if (enable_block_checksums && PageHasUnloggedChange(BufHdrGetBlock(buf)) &&
+ !InRecovery)
+ {
+ /* XXX cast away the "volatile" qualifier */
+ log_hintbits(&((BufferDesc *) buf)->tag.rnode, buf->tag.forkNum,
+ buf->tag.blockNum, BufHdrGetBlock(buf));
+ done = true;
+ }
+
+ /*
* Force XLOG flush up to buffer's LSN. This implements the basic WAL
* rule that log updates must hit disk before any of the data-file changes
* they describe do.
*************** FlushBuffer(volatile BufferDesc *buf, SM
*** 1819,1825 ****
smgrwrite(reln,
buf->tag.forkNum,
buf->tag.blockNum,
! (char *) BufHdrGetBlock(buf),
false);
BufferFlushCount++;
--- 1853,1859 ----
smgrwrite(reln,
buf->tag.forkNum,
buf->tag.blockNum,
! enable_block_checksums ? dblbuf : BufHdrGetBlock(buf),
false);
BufferFlushCount++;
Index: src/backend/storage/page/bufpage.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/page/bufpage.c,v
retrieving revision 1.81
diff -c -p -r1.81 bufpage.c
*** src/backend/storage/page/bufpage.c 3 Nov 2008 20:47:48 -0000 1.81
--- src/backend/storage/page/bufpage.c 3 Nov 2008 22:37:02 -0000
*************** PageInit(Page page, Size pageSize, Size
*** 41,46 ****
--- 41,47 ----
MemSet(p, 0, pageSize);
/* p->pd_flags = 0; done by above MemSet */
+ p->pd_checksum = PAGE_INVALID_CHECKSUM;
p->pd_lower = SizeOfPageHeaderData;
p->pd_upper = pageSize - specialSize;
p->pd_special = pageSize - specialSize;
*************** PageHeaderIsValid(PageHeader page)
*** 84,92 ****
page->pd_special == MAXALIGN(page->pd_special))
return true;
! /* Check all-zeroes case */
pagebytes = (char *) page;
! for (i = 0; i < BLCKSZ; i++)
{
if (pagebytes[i] != 0)
return false;
--- 85,93 ----
page->pd_special == MAXALIGN(page->pd_special))
return true;
! /* Check all-zeroes case (skipping the checksum) */
pagebytes = (char *) page;
! for (i = sizeof(PAGE_CHECKSUM_TYPE); i < BLCKSZ; i++)
{
if (pagebytes[i] != 0)
return false;
Index: src/backend/storage/smgr/smgr.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/storage/smgr/smgr.c,v
retrieving revision 1.112
diff -c -p -r1.112 smgr.c
*** src/backend/storage/smgr/smgr.c 30 Sep 2008 10:52:13 -0000 1.112
--- src/backend/storage/smgr/smgr.c 3 Nov 2008 22:37:02 -0000
***************
*** 27,32 ****
--- 27,35 ----
#include "utils/memutils.h"
+ /* Perform block checksumming for corruption detection */
+ bool enable_block_checksums = false;
+
/*
* This struct of function pointers defines the API between smgr.c and
* any individual storage manager module. Note that smgr subfunctions are
*************** void
*** 503,508 ****
--- 506,515 ----
smgrextend(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool isTemp)
{
+ /* Perform block checksumming for corruption detection */
+ if (enable_block_checksums)
+ WritePageChecksum(buffer);
+
(*(smgrsw[reln->smgr_which].smgr_extend)) (reln, forknum, blocknum,
buffer, isTemp);
}
*************** smgrread(SMgrRelation reln, ForkNumber f
*** 520,525 ****
--- 527,551 ----
char *buffer)
{
(*(smgrsw[reln->smgr_which].smgr_read)) (reln, forknum, blocknum, buffer);
+
+ /* Perform block checksumming for corruption detection */
+ if (enable_block_checksums && PageGetChecksum(buffer) != PAGE_INVALID_CHECKSUM)
+ {
+ PAGE_CHECKSUM_TYPE chksum;
+
+ CalcPageChecksum(buffer, chksum);
+
+ if (chksum != PageGetChecksum(buffer))
+ {
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("invalid checksum on read of block %u of relation %u/%u/%u",
+ blocknum,
+ reln->smgr_rnode.spcNode,
+ reln->smgr_rnode.dbNode,
+ reln->smgr_rnode.relNode)));
+ }
+ }
}
/*
*************** void
*** 541,546 ****
--- 567,578 ----
smgrwrite(SMgrRelation reln, ForkNumber forknum, BlockNumber blocknum,
char *buffer, bool isTemp)
{
+ /*
+ * Perform block checksumming before writing.
+ */
+ if (enable_block_checksums)
+ WritePageChecksum(buffer);
+
(*(smgrsw[reln->smgr_which].smgr_write)) (reln, forknum, blocknum,
buffer, isTemp);
}
Index: src/backend/utils/misc/guc.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/utils/misc/guc.c,v
retrieving revision 1.475
diff -c -p -r1.475 guc.c
*** src/backend/utils/misc/guc.c 6 Oct 2008 13:05:36 -0000 1.475
--- src/backend/utils/misc/guc.c 3 Nov 2008 22:37:02 -0000
***************
*** 57,62 ****
--- 57,63 ----
#include "regex/regex.h"
#include "storage/bufmgr.h"
#include "storage/fd.h"
+ #include "storage/smgr.h"
#include "tcop/tcopprot.h"
#include "tsearch/ts_cache.h"
#include "utils/builtins.h"
*************** static struct config_bool ConfigureNames
*** 762,767 ****
--- 763,778 ----
false, NULL, NULL
},
{
+ {"perform_checksum", PGC_SIGHUP, UNGROUPED,
+ gettext_noop("Forces checksumming of blocks to/from disk."),
+ gettext_noop("The server will perform a checksum on the block "
+ "when read from or written to disk in order to detect storage-related "
+ "corruption.")
+ },
+ &enable_block_checksums,
+ false, NULL, NULL
+ },
+ {
{"log_duration", PGC_SUSET, LOGGING_WHAT,
gettext_noop("Logs the duration of each completed SQL statement."),
NULL
Index: src/backend/utils/misc/postgresql.conf.sample
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/utils/misc/postgresql.conf.sample,v
retrieving revision 1.246
diff -c -p -r1.246 postgresql.conf.sample
*** src/backend/utils/misc/postgresql.conf.sample 30 Sep 2008 10:52:13 -0000 1.246
--- src/backend/utils/misc/postgresql.conf.sample 3 Nov 2008 22:37:02 -0000
***************
*** 480,485 ****
--- 480,490 ----
#transform_null_equals = off
+ #------------------------------------------------------------------------------
+ # CORRUPTION DETECTION
+ #------------------------------------------------------------------------------
+
+ #perform_checksum = off # Perform block checksumming to/from disk
#------------------------------------------------------------------------------
# CUSTOMIZED OPTIONS
Index: src/backend/utils/time/tqual.c
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/backend/utils/time/tqual.c,v
retrieving revision 1.110
diff -c -p -r1.110 tqual.c
*** src/backend/utils/time/tqual.c 26 Mar 2008 16:20:47 -0000 1.110
--- src/backend/utils/time/tqual.c 3 Nov 2008 22:37:02 -0000
***************
*** 44,49 ****
--- 44,50 ----
#include "access/xact.h"
#include "storage/bufmgr.h"
#include "storage/procarray.h"
+ #include "storage/smgr.h"
#include "utils/tqual.h"
*************** SetHintBits(HeapTupleHeader tuple, Buffe
*** 96,101 ****
--- 97,104 ----
}
tuple->t_infomask |= infomask;
+ if (enable_block_checksums)
+ PageSetUnloggedChange(BufferGetPage(buffer));
SetBufferCommitInfoNeedsSave(buffer);
}
Index: src/include/pg_config_manual.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/pg_config_manual.h,v
retrieving revision 1.35
diff -c -p -r1.35 pg_config_manual.h
*** src/include/pg_config_manual.h 12 Jul 2008 02:28:43 -0000 1.35
--- src/include/pg_config_manual.h 3 Nov 2008 22:37:02 -0000
***************
*** 195,201 ****
* Enable debugging print statements for WAL-related operations; see
* also the wal_debug GUC var.
*/
! /* #define WAL_DEBUG */
/*
* Enable tracing of resource consumption during sort operations;
--- 195,201 ----
* Enable debugging print statements for WAL-related operations; see
* also the wal_debug GUC var.
*/
! #define WAL_DEBUG 1
/*
* Enable tracing of resource consumption during sort operations;
Index: src/include/access/heapam.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/access/heapam.h,v
retrieving revision 1.139
diff -c -p -r1.139 heapam.h
*** src/include/access/heapam.h 8 Oct 2008 01:14:44 -0000 1.139
--- src/include/access/heapam.h 3 Nov 2008 22:37:02 -0000
*************** extern XLogRecPtr log_heap_freeze(Relati
*** 131,136 ****
--- 131,138 ----
OffsetNumber *offsets, int offcnt);
extern XLogRecPtr log_newpage(RelFileNode *rnode, ForkNumber forkNum,
BlockNumber blk, Page page);
+ extern XLogRecPtr log_hintbits(RelFileNode *rnode, ForkNumber forkNum,
+ BlockNumber blk, Page page);
/* in heap/pruneheap.c */
extern void heap_page_prune_opt(Relation relation, Buffer buffer,
Index: src/include/access/htup.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/access/htup.h,v
retrieving revision 1.103
diff -c -p -r1.103 htup.h
*** src/include/access/htup.h 2 Nov 2008 01:45:28 -0000 1.103
--- src/include/access/htup.h 3 Nov 2008 22:37:02 -0000
*************** typedef HeapTupleData *HeapTuple;
*** 580,585 ****
--- 580,586 ----
#define XLOG_HEAP2_FREEZE 0x00
#define XLOG_HEAP2_CLEAN 0x10
#define XLOG_HEAP2_CLEAN_MOVE 0x20
+ #define XLOG_HEAP2_HINTBITS 0x30
/*
* All what we need to find changed tuple
*************** typedef struct xl_heap_freeze
*** 714,719 ****
--- 715,730 ----
#define SizeOfHeapFreeze (offsetof(xl_heap_freeze, cutoff_xid) + sizeof(TransactionId))
+ /* This is what we need to know about hint bits */
+ typedef struct xl_heap_hintbits
+ {
+ RelFileNode node;
+ BlockNumber block;
+ /* HINT BIT ARRAY FOLLOWS AT THE END */
+ } xl_heap_hintbits;
+
+ #define SizeOfHeapHintbits (offsetof(xl_heap_hintbits, block) + sizeof(BlockNumber))
+
/* HeapTupleHeader functions implemented in utils/time/combocid.c */
extern CommandId HeapTupleHeaderGetCmin(HeapTupleHeader tup);
extern CommandId HeapTupleHeaderGetCmax(HeapTupleHeader tup);
Index: src/include/storage/bufpage.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/storage/bufpage.h,v
retrieving revision 1.84
diff -c -p -r1.84 bufpage.h
*** src/include/storage/bufpage.h 3 Nov 2008 20:47:49 -0000 1.84
--- src/include/storage/bufpage.h 3 Nov 2008 22:37:02 -0000
***************
*** 17,22 ****
--- 17,23 ----
#include "access/xlogdefs.h"
#include "storage/item.h"
#include "storage/off.h"
+ #include "utils/pg_crc.h"
/*
* A postgres disk page is an abstraction layered on top of a postgres
*************** typedef uint16 LocationIndex;
*** 87,92 ****
--- 88,94 ----
*
* space management information generic to any page
*
+ * pd_checksum - the checksum of the page
* pd_lsn - identifies xlog record for last change to this page.
* pd_tli - ditto.
* pd_flags - flag bits.
*************** typedef uint16 LocationIndex;
*** 118,136 ****
* the constraint on pagesize mod 256 is not an important restriction.
* On the high end, we can only support pages up to 32KB because lp_off/lp_len
* are 15 bits.
*/
typedef struct PageHeaderData
{
! /* XXX LSN is member of *any* block, not only page-organized ones */
XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
* record for last change to this page */
- uint16 pd_tli; /* least significant bits of the TimeLineID
- * containing the LSN */
- uint16 pd_flags; /* flag bits, see below */
LocationIndex pd_lower; /* offset to start of free space */
LocationIndex pd_upper; /* offset to end of free space */
LocationIndex pd_special; /* offset to start of special space */
uint16 pd_pagesize_version;
TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
ItemIdData pd_linp[1]; /* beginning of line pointer array */
} PageHeaderData;
--- 120,143 ----
* the constraint on pagesize mod 256 is not an important restriction.
* On the high end, we can only support pages up to 32KB because lp_off/lp_len
* are 15 bits.
+ *
+ * Note that pd_tli appears in a rather awkward position in the struct;
+ * this is because we moved it to accomodate pd_checksum without changing
+ * pg_pagesize_version's offset.
*/
typedef struct PageHeaderData
{
! /* XXX CRC & LSN are members of *any* block, not only page-organized ones */
! pg_crc32 pd_checksum; /* The block-level checksum */
XLogRecPtr pd_lsn; /* LSN: next byte after last byte of xlog
* record for last change to this page */
LocationIndex pd_lower; /* offset to start of free space */
LocationIndex pd_upper; /* offset to end of free space */
LocationIndex pd_special; /* offset to start of special space */
uint16 pd_pagesize_version;
+ uint16 pd_tli; /* least significant bits of the TimeLineID
+ * containing the LSN */
+ uint16 pd_flags; /* flag bits, see below */
TransactionId pd_prune_xid; /* oldest prunable XID, or zero if none */
ItemIdData pd_linp[1]; /* beginning of line pointer array */
} PageHeaderData;
*************** typedef PageHeaderData *PageHeader;
*** 148,159 ****
* PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
* page for its new tuple version; this suggests that a prune is needed.
* Again, this is just a hint.
*/
#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
#define PD_PAGE_FULL 0x0002 /* not enough free space for new
* tuple? */
! #define PD_VALID_FLAG_BITS 0x0003 /* OR of all valid pd_flags bits */
/*
* Page layout version number 0 is for pre-7.3 Postgres releases.
--- 155,172 ----
* PD_PAGE_FULL is set if an UPDATE doesn't find enough free space in the
* page for its new tuple version; this suggests that a prune is needed.
* Again, this is just a hint.
+ *
+ * PG_UNLOGGED_CHANGE indicates whether a process has set hint bits on the
+ * page. This is used to determine whether a WAL message needs to be emitted
+ * before writing the page to disk when page checksums are enabled.
*/
#define PD_HAS_FREE_LINES 0x0001 /* are there any unused line pointers? */
#define PD_PAGE_FULL 0x0002 /* not enough free space for new
* tuple? */
+ #define PD_UNLOGGED_CHANGE 0x0004 /* does the page have unlogged hint
+ bits? */
! #define PD_VALID_FLAG_BITS 0x0007 /* OR of all valid pd_flags bits */
/*
* Page layout version number 0 is for pre-7.3 Postgres releases.
*************** typedef PageHeaderData *PageHeader;
*** 163,170 ****
* Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
* added the pd_flags field (by stealing some bits from pd_tli),
* as well as adding the pd_prune_xid field (which enlarges the header).
*/
! #define PG_PAGE_LAYOUT_VERSION 4
/* ----------------------------------------------------------------
--- 176,186 ----
* Release 8.3 uses 4; it changed the HeapTupleHeader layout again, and
* added the pd_flags field (by stealing some bits from pd_tli),
* as well as adding the pd_prune_xid field (which enlarges the header).
+ * Release 8.4 uses 5; it added a checksum to the page header, and moved
+ * pd_tli and pd_flags so that the page version would keep the same
+ * offset.
*/
! #define PG_PAGE_LAYOUT_VERSION 5
/* ----------------------------------------------------------------
*************** do { \
*** 352,357 ****
--- 368,410 ----
#define PageClearPrunable(page) \
(((PageHeader) (page))->pd_prune_xid = InvalidTransactionId)
+ /* ----------------------------------------------------------------
+ * CRC support
+ * ----------------------------------------------------------------
+ */
+ #define PAGE_CHECKSUM_TYPE pg_crc32
+ #define SIZEOF_PAGE_CHECKSUM sizeof(PAGE_CHECKSUM_TYPE)
+ #define PAGE_INVALID_CHECKSUM 0xb79a6e9c
+
+ #define CalcPageChecksum(buffer, sum) \
+ do { \
+ INIT_CRC32(sum); \
+ COMP_CRC32(sum, &buffer[sizeof(pg_crc32)], \
+ offsetof(PageHeaderData, pd_flags) - sizeof(pg_crc32)); \
+ COMP_CRC32(sum, &buffer[offsetof(PageHeaderData, pd_flags) + sizeof(uint16)], \
+ BLCKSZ - (offsetof(PageHeaderData, pd_flags) + sizeof(uint16))); \
+ FIN_CRC32(sum); \
+ } while (0)
+
+ /* beware multiple evaluation of argument */
+ #define WritePageChecksum(buffer) \
+ do { \
+ PAGE_CHECKSUM_TYPE chksum; \
+ CalcPageChecksum(buffer, chksum); \
+ PageSetChecksum(buffer, chksum); \
+ } while (0)
+
+ #define PageGetChecksum(page) \
+ (((PageHeader) (page))->pd_checksum)
+ #define PageSetChecksum(page, checksum) \
+ (((PageHeader) (page))->pd_checksum = (checksum))
+
+ #define PageHasUnloggedChange(page) \
+ (((PageHeader) (page))->pd_flags & PD_UNLOGGED_CHANGE)
+ #define PageSetUnloggedChange(page) \
+ (((PageHeader) (page))->pd_flags |= PD_UNLOGGED_CHANGE)
+ #define PageClearUnloggedChange(page) \
+ (((PageHeader) (page))->pd_flags &= ~PD_UNLOGGED_CHANGE)
/* ----------------------------------------------------------------
* extern declarations
Index: src/include/storage/smgr.h
===================================================================
RCS file: /home/alvherre/Code/cvs/pgsql/src/include/storage/smgr.h,v
retrieving revision 1.63
diff -c -p -r1.63 smgr.h
*** src/include/storage/smgr.h 11 Aug 2008 11:05:11 -0000 1.63
--- src/include/storage/smgr.h 3 Nov 2008 22:37:02 -0000
***************
*** 20,25 ****
--- 20,28 ----
#include "storage/relfilenode.h"
+ /* Perform block checksumming for corruption detection */
+ bool enable_block_checksums;
+
/*
* smgr.c maintains a table of SMgrRelation objects, which are essentially
* cached file handles. An SMgrRelation is created (if not already present)
--
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers