From 4b051a1aae7ad344269ca81322b956e24158a881 Mon Sep 17 00:00:00 2001
From: Kirk Jamison <k.jamison@jp.fujitsu.com>
Date: Fri, 11 Sep 2020 13:00:33 +0000
Subject: [PATCH] Optimize DropRelFileNodeBuffers() during recovery.

The recovery path of DropRelFileNodeBuffers() is optimized so that
scanning of the whole buffer pool is avoided when the relation
is small enough, or the the total number of blocks to be invalidated
is below the threshold of full scanning.

While recovery, we can get a reliable cached value of nblocks for
supplied relation's fork by calling smgrnblocks(), and it's safe
because there are no other processes but the startup process that
changes the relation size during recovery.  Otherwise, or if not in
recovery, proceed to sequential search of the whole buffer pool.
---
 src/backend/storage/buffer/bufmgr.c | 125 ++++++++++++++++++++++++++++++++++--
 src/backend/storage/smgr/smgr.c     |   2 +-
 src/include/storage/bufmgr.h        |   2 +-
 3 files changed, 120 insertions(+), 9 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index a2a963b..7c2c196 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -70,6 +70,8 @@
 
 #define RELS_BSEARCH_THRESHOLD		20
 
+#define BUF_DROP_FULLSCAN_THRESHOLD		(uint32)(NBuffers / 500)
+
 typedef struct PrivateRefCountEntry
 {
 	Buffer		buffer;
@@ -473,6 +475,8 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
 							   BufferAccessStrategy strategy,
 							   bool *foundPtr);
 static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
+static void DropRelFileNodeBuffersOfFork(RelFileNode rnode, ForkNumber forkNum,
+										 BlockNumber firstDelBlock);
 static void AtProcExit_Buffers(int code, Datum arg);
 static void CheckForBufferLeaks(void);
 static int	rnode_comparator(const void *p1, const void *p2);
@@ -2972,18 +2976,27 @@ BufferGetLSNAtomic(Buffer buffer)
  *		that no other process could be trying to load more pages of the
  *		relation into buffers.
  *
- *		XXX currently it sequentially searches the buffer pool, should be
- *		changed to more clever ways of searching.  However, this routine
- *		is used only in code paths that aren't very performance-critical,
- *		and we shouldn't slow down the hot paths to make it faster ...
+ *		XXX The relation might have extended before this, so this path is
+ *		only optimized during recovery when we can get a reliable cached
+ *		value of blocks for specified relation.  See comment in
+ *		smgrnblocks() in smgr.c.  In addition, it is safe to do this since
+ *		there are no other processes but the startup process that changes
+ *		the relation size during recovery.  Otherwise, or if not in
+ *		recovery, proceed to usual invalidation process, where it
+ *		sequentially searches the buffer pool.
  * --------------------------------------------------------------------
  */
 void
-DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
+DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
 					   int nforks, BlockNumber *firstDelBlock)
 {
 	int			i;
 	int			j;
+	RelFileNodeBackend	rnode;
+	BufferDesc	*bufHdr;
+	uint32		buf_state;
+
+	rnode = smgr_reln->smgr_rnode;
 
 	/* If it's a local relation, it's localbuf.c's problem. */
 	if (RelFileNodeBackendIsTemp(rnode))
@@ -2997,10 +3010,77 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
 		return;
 	}
 
+	if (InRecovery)
+	{
+		for (j = 0; j < nforks; j++)
+		{
+			BlockNumber	nTotalBlocks; /* total nblocks */
+			BlockNumber nBlocksToInvalidate; /* total nblocks to be invalidated */
+
+			/* Get the total number of blocks for the supplied relation's fork */
+			nTotalBlocks = smgrnblocks(smgr_reln, forkNum[j]);
+
+			/* Get the total number of blocks to be invalidated for the specified fork */
+			nBlocksToInvalidate = nTotalBlocks - firstDelBlock[j];
+
+			/*
+			 * Do explicit hashtable probe iff the ratio of total number of buffers to be
+			 * truncated against NBuffers is less than the threshold for full-scanning of
+			 * buffer pool. IOW, relation is small enough for its buffers to be removed.
+			 */
+			if (nBlocksToInvalidate < BUF_DROP_FULLSCAN_THRESHOLD)
+			{
+				BlockNumber		curBlock;
+
+				for (curBlock = firstDelBlock[j]; curBlock < nTotalBlocks; curBlock++)
+				{
+					uint32		newHash;		/* hash value for newTag */
+					BufferTag	newTag;			/* identity of requested block */
+					LWLock	   	*newPartitionLock;	/* buffer partition lock for it */
+					int		buf_id;
+
+					/* create a tag so we can lookup the buffer */
+					INIT_BUFFERTAG(newTag, rnode.node, forkNum[j], curBlock);
+
+					/* determine its hash code and partition lock ID */
+					newHash = BufTableHashCode(&newTag);
+					newPartitionLock = BufMappingPartitionLock(newHash);
+
+					/* Check that it is in the buffer pool. If not, do nothing */
+					LWLockAcquire(newPartitionLock, LW_SHARED);
+					buf_id = BufTableLookup(&newTag, newHash);
+					LWLockRelease(newPartitionLock);
+
+					if (buf_id < 0)
+						continue;
+
+					bufHdr = GetBufferDescriptor(buf_id);
+
+					buf_state = LockBufHdr(bufHdr);
+
+					if (RelFileNodeEquals(bufHdr->tag.rnode, rnode.node) &&
+						bufHdr->tag.forkNum == forkNum[j] &&
+						bufHdr->tag.blockNum == curBlock)
+						InvalidateBuffer(bufHdr);	/* releases spinlock */
+					else
+						UnlockBufHdr(bufHdr, buf_state);
+				}
+			}
+			else
+			{
+				/*
+				 * Relation is larger than the threshold. Execute full scan of
+				 * buffer pool for each fork.
+				 */
+				DropRelFileNodeBuffersOfFork(rnode.node, forkNum[j],
+											 firstDelBlock[j]);
+			}
+		}
+		return;
+	}
 	for (i = 0; i < NBuffers; i++)
 	{
-		BufferDesc *bufHdr = GetBufferDescriptor(i);
-		uint32		buf_state;
+		bufHdr = GetBufferDescriptor(i);
 
 		/*
 		 * We can make this a tad faster by prechecking the buffer tag before
@@ -3038,6 +3118,37 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
 	}
 }
 
+
+/* -----------------------------------------------------------------
+ *		DropRelFileNodeBuffersOfFork
+ *
+ *		This function removes from the buffer pool the pages for
+ *		the specified relation's fork.
+ * -----------------------------------------------------------------
+ */
+static void
+DropRelFileNodeBuffersOfFork(RelFileNode rnode, ForkNumber forkNum,
+							 BlockNumber firstDelBlock)
+{
+	int			i;
+
+	for (i = 0; i < NBuffers; i++)
+	{
+		BufferDesc	*bufHdr = GetBufferDescriptor(i);
+		uint32		buf_state;
+
+		buf_state = LockBufHdr(bufHdr);
+
+		if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
+			bufHdr->tag.forkNum == forkNum &&
+			bufHdr->tag.blockNum >= firstDelBlock)
+			InvalidateBuffer(bufHdr);	/* releases spinlock */
+		else
+			UnlockBufHdr(bufHdr, buf_state);
+	}
+}
+
+
 /* ---------------------------------------------------------------------
  *		DropRelFileNodesAllBuffers
  *
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index dcc09df..5238c6c 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -583,7 +583,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
 	 * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
 	 * just drop them without bothering to write the contents.
 	 */
-	DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nforks, nblocks);
+	DropRelFileNodeBuffers(reln, forknum, nforks, nblocks);
 
 	/*
 	 * Send a shared-inval message to force other backends to close any smgr
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index ee91b8f..056f65e 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -203,7 +203,7 @@ extern void FlushOneBuffer(Buffer buffer);
 extern void FlushRelationBuffers(Relation rel);
 extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
 extern void FlushDatabaseBuffers(Oid dbid);
-extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
+extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum,
 								   int nforks, BlockNumber *firstDelBlock);
 extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes);
 extern void DropDatabaseBuffers(Oid dbid);
-- 
1.8.3.1

