Simple tuning of btree_xlog_vacuum() using an idea I had a while back,
just never implemented. XXX comments removed.

Allows us to avoid reading in blocks during VACUUM replay that are only
required for correctness of index scans.

Objections to commit?

 Simon Riggs 
*** a/src/backend/access/nbtree/nbtxlog.c
--- b/src/backend/access/nbtree/nbtxlog.c
*** 486,505 **** btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
  		for (; blkno < xlrec->block; blkno++)
! 			 * XXX we don't actually need to read the block, we just need to
! 			 * confirm it is unpinned. If we had a special call into the
! 			 * buffer manager we could optimise this so that if the block is
! 			 * not in shared_buffers we confirm it as unpinned.
! 			 *
! 			 * Another simple optimization would be to check if there's any
! 			 * backends running; if not, we could just skip this.
! 			buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL);
! 			if (BufferIsValid(buffer))
! 			{
! 				LockBufferForCleanup(buffer);
! 				UnlockReleaseBuffer(buffer);
! 			}
--- 486,496 ----
  		for (; blkno < xlrec->block; blkno++)
! 			 * We don't actually need to read the block, we just need to
! 			 * confirm it is unpinned, since if it's not in shared_buffers then
! 			 * we're OK.
! 			XLogConfirmBufferIsUnpinned(xlrec->node, MAIN_FORKNUM, blkno);
*** a/src/backend/access/transam/xlogutils.c
--- b/src/backend/access/transam/xlogutils.c
*** 342,347 **** XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
--- 342,377 ----
  	return buffer;
+ void
+ XLogConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forknum,
+ 							BlockNumber blkno)
+ {
+ 	BlockNumber lastblock;
+ 	SMgrRelation smgr;
+ 	Assert(blkno != P_NEW);
+ 	/* Open the relation at smgr level */
+ 	smgr = smgropen(rnode);
+ 	/*
+ 	 * Create the target file if it doesn't already exist.  This lets us cope
+ 	 * if the replay sequence contains writes to a relation that is later
+ 	 * deleted.  (The original coding of this routine would instead suppress
+ 	 * the writes, but that seems like it risks losing valuable data if the
+ 	 * filesystem loses an inode during a crash.  Better to write the data
+ 	 * until we are actually told to delete the file.)
+ 	 */
+ 	smgrcreate(smgr, forknum, true);
+ 	lastblock = smgrnblocks(smgr, forknum);
+ 	if (blkno >= lastblock)
+ 		return;
+ 	/* page exists in file */
+ 	ConfirmBufferIsUnpinned(rnode, forknum, blkno);
+ }
   * Struct actually returned by XLogFakeRelcacheEntry, though the declared
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
*** 475,480 **** ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum,
--- 475,520 ----
  	return BufferDescriptorGetBuffer(bufHdr);
+ void
+ ConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum)
+ {
+ 	BufferTag	bufTag;			/* identity of requested block */
+ 	uint32		bufHash;		/* hash value for newTag */
+ 	LWLockId	bufPartitionLock;		/* buffer partition lock for it */
+ 	int			buf_id;
+ 	SMgrRelation smgr = smgropen(rnode);
+ 	/* create a tag so we can lookup the buffer */
+ 	INIT_BUFFERTAG(bufTag, smgr->smgr_rnode, forkNum, blockNum);
+ 	/* determine its hash code and partition lock ID */
+ 	bufHash = BufTableHashCode(&bufTag);
+ 	bufPartitionLock = BufMappingPartitionLock(bufHash);
+ 	/* see if the block is in the buffer pool already */
+ 	LWLockAcquire(bufPartitionLock, LW_SHARED);
+ 	buf_id = BufTableLookup(&bufTag, bufHash);
+ 	/*
+ 	 * If buffer isn't present it must be unpinned.
+ 	 */
+ 	if (buf_id >= 0)
+ 	{
+ 		volatile BufferDesc *buf;
+ 		buf = &BufferDescriptors[buf_id];
+ 		/*
+ 		 * Found it.  Now, pin/unpin the buffer to prove it's unpinned.
+ 		 */
+ 		if (PinBuffer(buf, NULL))
+ 			UnpinBuffer(buf, false);
+ 	}
+ 	LWLockRelease(bufPartitionLock);
+ }
   * BufferAlloc -- subroutine for ReadBuffer.  Handles lookup of a shared
   *		buffer.  If no buffer exists already, selects a replacement
*** a/src/include/access/xlogutils.h
--- b/src/include/access/xlogutils.h
*** 28,33 **** extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
--- 28,35 ----
  extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
  extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
  					   BlockNumber blkno, ReadBufferMode mode);
+ extern void XLogConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forknum,
+ 							BlockNumber blkno);
  extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
  extern void FreeFakeRelcacheEntry(Relation fakerel);
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
*** 163,168 **** extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
--- 163,170 ----
  extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
  						  ForkNumber forkNum, BlockNumber blockNum,
  						  ReadBufferMode mode, BufferAccessStrategy strategy);
+ extern void ConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forkNum,
+ 					BlockNumber blockNum);
  extern void ReleaseBuffer(Buffer buffer);
  extern void UnlockReleaseBuffer(Buffer buffer);
  extern void MarkBufferDirty(Buffer buffer);
Sent via pgsql-hackers mailing list (
To make changes to your subscription:

Reply via email to