[HACKERS] Hot Standby tuning for btree_xlog_vacuum()

Simon Riggs Thu, 29 Apr 2010 13:12:45 -0700

Simple tuning of btree_xlog_vacuum() using an idea I had a while back,
just never implemented. XXX comments removed.


Allows us to avoid reading in blocks during VACUUM replay that are only
required for correctness of index scans.

Objections to commit?

-- 
 Simon Riggs           www.2ndQuadrant.com

*** a/src/backend/access/nbtree/nbtxlog.c
--- b/src/backend/access/nbtree/nbtxlog.c
***************
*** 486,505 **** btree_xlog_vacuum(XLogRecPtr lsn, XLogRecord *record)
  		for (; blkno < xlrec->block; blkno++)
  		{
  			/*
! 			 * XXX we don't actually need to read the block, we just need to
! 			 * confirm it is unpinned. If we had a special call into the
! 			 * buffer manager we could optimise this so that if the block is
! 			 * not in shared_buffers we confirm it as unpinned.
! 			 *
! 			 * Another simple optimization would be to check if there's any
! 			 * backends running; if not, we could just skip this.
  			 */
! 			buffer = XLogReadBufferExtended(xlrec->node, MAIN_FORKNUM, blkno, RBM_NORMAL);
! 			if (BufferIsValid(buffer))
! 			{
! 				LockBufferForCleanup(buffer);
! 				UnlockReleaseBuffer(buffer);
! 			}
  		}
  	}
  
--- 486,496 ----
  		for (; blkno < xlrec->block; blkno++)
  		{
  			/*
! 			 * We don't actually need to read the block, we just need to
! 			 * confirm it is unpinned, since if it's not in shared_buffers then
! 			 * we're OK.
  			 */
! 			XLogConfirmBufferIsUnpinned(xlrec->node, MAIN_FORKNUM, blkno);
  		}
  	}
  
*** a/src/backend/access/transam/xlogutils.c
--- b/src/backend/access/transam/xlogutils.c
***************
*** 342,347 **** XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
--- 342,377 ----
  	return buffer;
  }
  
+ void
+ XLogConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forknum,
+ 							BlockNumber blkno)
+ {
+ 	BlockNumber lastblock;
+ 	SMgrRelation smgr;
+ 
+ 	Assert(blkno != P_NEW);
+ 
+ 	/* Open the relation at smgr level */
+ 	smgr = smgropen(rnode);
+ 
+ 	/*
+ 	 * Create the target file if it doesn't already exist.  This lets us cope
+ 	 * if the replay sequence contains writes to a relation that is later
+ 	 * deleted.  (The original coding of this routine would instead suppress
+ 	 * the writes, but that seems like it risks losing valuable data if the
+ 	 * filesystem loses an inode during a crash.  Better to write the data
+ 	 * until we are actually told to delete the file.)
+ 	 */
+ 	smgrcreate(smgr, forknum, true);
+ 
+ 	lastblock = smgrnblocks(smgr, forknum);
+ 
+ 	if (blkno >= lastblock)
+ 		return;
+ 
+ 	/* page exists in file */
+ 	ConfirmBufferIsUnpinned(rnode, forknum, blkno);
+ }
  
  /*
   * Struct actually returned by XLogFakeRelcacheEntry, though the declared
*** a/src/backend/storage/buffer/bufmgr.c
--- b/src/backend/storage/buffer/bufmgr.c
***************
*** 475,480 **** ReadBuffer_common(SMgrRelation smgr, bool isLocalBuf, ForkNumber forkNum,
--- 475,520 ----
  	return BufferDescriptorGetBuffer(bufHdr);
  }
  
+ void
+ ConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forkNum, BlockNumber blockNum)
+ {
+ 	BufferTag	bufTag;			/* identity of requested block */
+ 	uint32		bufHash;		/* hash value for newTag */
+ 	LWLockId	bufPartitionLock;		/* buffer partition lock for it */
+ 	int			buf_id;
+ 	SMgrRelation smgr = smgropen(rnode);
+ 
+ 	/* create a tag so we can lookup the buffer */
+ 	INIT_BUFFERTAG(bufTag, smgr->smgr_rnode, forkNum, blockNum);
+ 
+ 	/* determine its hash code and partition lock ID */
+ 	bufHash = BufTableHashCode(&bufTag);
+ 	bufPartitionLock = BufMappingPartitionLock(bufHash);
+ 
+ 	/* see if the block is in the buffer pool already */
+ 	LWLockAcquire(bufPartitionLock, LW_SHARED);
+ 
+ 	buf_id = BufTableLookup(&bufTag, bufHash);
+ 
+ 	/*
+ 	 * If buffer isn't present it must be unpinned.
+ 	 */
+ 	if (buf_id >= 0)
+ 	{
+ 		volatile BufferDesc *buf;
+ 
+ 		buf = &BufferDescriptors[buf_id];
+ 
+ 		/*
+ 		 * Found it.  Now, pin/unpin the buffer to prove it's unpinned.
+ 		 */
+ 		if (PinBuffer(buf, NULL))
+ 			UnpinBuffer(buf, false);
+ 	}
+ 
+ 	LWLockRelease(bufPartitionLock);
+ }
+ 
  /*
   * BufferAlloc -- subroutine for ReadBuffer.  Handles lookup of a shared
   *		buffer.  If no buffer exists already, selects a replacement
*** a/src/include/access/xlogutils.h
--- b/src/include/access/xlogutils.h
***************
*** 28,33 **** extern void XLogTruncateRelation(RelFileNode rnode, ForkNumber forkNum,
--- 28,35 ----
  extern Buffer XLogReadBuffer(RelFileNode rnode, BlockNumber blkno, bool init);
  extern Buffer XLogReadBufferExtended(RelFileNode rnode, ForkNumber forknum,
  					   BlockNumber blkno, ReadBufferMode mode);
+ extern void XLogConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forknum,
+ 							BlockNumber blkno);
  
  extern Relation CreateFakeRelcacheEntry(RelFileNode rnode);
  extern void FreeFakeRelcacheEntry(Relation fakerel);
*** a/src/include/storage/bufmgr.h
--- b/src/include/storage/bufmgr.h
***************
*** 163,168 **** extern Buffer ReadBufferExtended(Relation reln, ForkNumber forkNum,
--- 163,170 ----
  extern Buffer ReadBufferWithoutRelcache(RelFileNode rnode, bool isTemp,
  						  ForkNumber forkNum, BlockNumber blockNum,
  						  ReadBufferMode mode, BufferAccessStrategy strategy);
+ extern void ConfirmBufferIsUnpinned(RelFileNode rnode, ForkNumber forkNum,
+ 					BlockNumber blockNum);
  extern void ReleaseBuffer(Buffer buffer);
  extern void UnlockReleaseBuffer(Buffer buffer);
  extern void MarkBufferDirty(Buffer buffer);

-- 
Sent via pgsql-hackers mailing list ([email protected])
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

[HACKERS] Hot Standby tuning for btree_xlog_vacuum()

Reply via email to