Simple tuning of btree_xlog_vacuum() using an idea I had a while back, just never implemented. XXX comments removed.
Allows us to avoid reading in blocks during VACUUM replay that are only required for correctness of index scans. Objections to commit? -- Simon Riggs www.2ndQuadrant.com
*** a/src/backend/access/nbtree/nbtxlog.c --- b/src/backend/access/nbtree/nbtxlog.c *************** *** 486,505 **** btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record) for (; blkno < xlrec->block; blkno++) { /* ! * XXX we don't actually need to read the block, we just need to ! * confirm it is unpinned. If we had a special call into the ! * buffer manager we could optimise this so that if the block is ! * not in shared_buffers we confirm it as unpinned. ! * ! * Another simple optimization would be to check if there's any ! * backends running; if not, we could just skip this. */ ! buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL); ! if (BufferIsValid(buffer)) ! { ! LockBufferForCleanup(buffer); ! UnlockReleaseBuffer(buffer); ! } } } --- 486,496 ---- for (; blkno < xlrec->block; blkno++) { /* ! * We don't actually need to read the block, we just need to ! * confirm it is unpinned, since if it's not in shared_buffers then ! * we're OK. */ ! XLogConfirmBufferIsUnpinned(xlrec->node, MAIN_FORKNUM, blkno); } } *** a/src/backend/access/transam/xlogutils.c --- b/src/backend/access/transam/xlogutils.c *************** *** 342,347 **** XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, --- 342,377 ---- return buffer; } + void + XLogConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forknum, + BlockNumber blkno) + { + BlockNumber lastblock; + SMgrRelation smgr; + + Assert(blkno != P_NEW); + + /* Open the relation at smgr level */ + smgr = smgropen(rnode); + + /* + * Create the target file if it doesn't already exist. This lets us cope + * if the replay sequence contains writes to a relation that is later + * deleted. (The original coding of this routine would instead suppress + * the writes, but that seems like it risks losing valuable data if the + * filesystem loses an inode during a crash. Better to write the data + * until we are actually told to delete the file.) + */ + smgrcreate(smgr, forknum, true); + + lastblock = smgrnblocks(smgr, forknum); + + if (blkno >= lastblock) + return; + + /* page exists in file */ + ConfirmBufferIsUnpinned(rnode, forknum, blkno); + } /* * Struct actually returned by XLogFakeRelcacheEntry, though the declared *** a/src/backend/storage/buffer/bufmgr.c --- b/src/backend/storage/buffer/bufmgr.c *************** *** 475,480 **** ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum, --- 475,520 ---- return BufferDescriptorGetBuffer(bufHdr); } + void + ConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum) + { + BufferTag bufTag; /* identity of requested block */ + uint32 bufHash; /* hash value for newTag */ + LWLockId bufPartitionLock; /* buffer partition lock for it */ + int buf_id; + SMgrRelation smgr = smgropen(rnode); + + /* create a tag so we can lookup the buffer */ + INIT_BUFFERTAG(bufTag, smgr->smgr_rnode, forkNum, blockNum); + + /* determine its hash code and partition lock ID */ + bufHash = BufTableHashCode(&bufTag); + bufPartitionLock = BufMappingPartitionLock(bufHash); + + /* see if the block is in the buffer pool already */ + LWLockAcquire(bufPartitionLock, LW_SHARED); + + buf_id = BufTableLookup(&bufTag, bufHash); + + /* + * If buffer isn't present it must be unpinned. + */ + if (buf_id >= 0) + { + volatile BufferDesc *buf; + + buf = &BufferDescriptors[buf_id]; + + /* + * Found it. Now, pin/unpin the buffer to prove it's unpinned. + */ + if (PinBuffer(buf, NULL)) + UnpinBuffer(buf, false); + } + + LWLockRelease(bufPartitionLock); + } + /* * BufferAlloc -- subroutine for ReadBuffer. Handles lookup of a shared * buffer. If no buffer exists already, selects a replacement *** a/src/include/access/xlogutils.h --- b/src/include/access/xlogutils.h *************** *** 28,33 **** extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum, --- 28,35 ---- extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init); extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum, BlockNumber blkno, ReadBufferMode mode); + extern void XLogConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forknum, + BlockNumber blkno); extern Relation CreateFakeRelcacheEntry(RelFileNode rnode); extern void FreeFakeRelcacheEntry(Relation fakerel); *** a/src/include/storage/bufmgr.h --- b/src/include/storage/bufmgr.h *************** *** 163,168 **** extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum, --- 163,170 ---- extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp, ForkNumber forkNum, BlockNumber blockNum, ReadBufferMode mode, BufferAccessStrategy strategy); + extern void ConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forkNum, + BlockNumber blockNum); extern void ReleaseBuffer(Buffer buffer); extern void UnlockReleaseBuffer(Buffer buffer); extern void MarkBufferDirty(Buffer buffer);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers