From a0169fd760a58543417aadbc880ae1ab6e7dc376 Mon Sep 17 00:00:00 2001
From: Kirk Jamison <k.jamison@jp.fujitsu.com>
Date: Tue, 10 Nov 2020 06:27:11 +0000
Subject: [PATCH v31 3/4] Optimize DropRelFileNodeBuffers() during recovery.

The recovery path of DropRelFileNodeBuffers() is optimized so that
scanning of the whole buffer pool is avoided when the relation
is small enough, or the the total number of blocks to be invalidated
is below the threshold of full scanning. This improves the
performance especially when VACUUM or autovacuum truncated off any
of the empty pages at the end of relation.

While recovery, we can get a reliable cached value of nblocks for
supplied relation's fork, and it's safe because there are no other
processes but the startup process that changes the relation size
during recovery.  Otherwise, or if not in recovery, proceed to
sequential search of the whole buffer pool.
---
 src/backend/storage/buffer/bufmgr.c | 115 +++++++++++++++++++++++++++++++++---
 src/backend/storage/smgr/smgr.c     |   2 +-
 src/include/storage/bufmgr.h        |   2 +-
 3 files changed, 110 insertions(+), 9 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 1680bf4..67284e3 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -70,6 +70,8 @@
 
 #define RELS_BSEARCH_THRESHOLD		20
 
+#define BUF_DROP_FULL_SCAN_THRESHOLD		(uint32)(NBuffers / 256)
+
 typedef struct PrivateRefCountEntry
 {
 	Buffer		buffer;
@@ -473,6 +475,11 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
 							   BufferAccessStrategy strategy,
 							   bool *foundPtr);
 static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
+static void FindAndDropRelFileNodeBuffers(RelFileNode rnode,
+										  ForkNumber *forkNum,
+										  int nforks,
+										  BlockNumber *nForkBlocks,
+										  BlockNumber *firstDelBlock);
 static void AtProcExit_Buffers(int code, Datum arg);
 static void CheckForBufferLeaks(void);
 static int	rnode_comparator(const void *p1, const void *p2);
@@ -2967,18 +2974,25 @@ BufferGetLSNAtomic(Buffer buffer)
  *		that no other process could be trying to load more pages of the
  *		relation into buffers.
  *
- *		XXX currently it sequentially searches the buffer pool, should be
- *		changed to more clever ways of searching.  However, this routine
- *		is used only in code paths that aren't very performance-critical,
- *		and we shouldn't slow down the hot paths to make it faster ...
+ *		If the expected maximum number of buffers to be dropped is small
+ *		enough, individual buffer is located by BufTableLookup().  Otherwise,
+ *		the buffer pool is sequentially scanned. Since buffers must not be
+ *		left behind, the latter way is executed unless the sizes of all the
+ *		involved forks are already cached. See smgrnblocks() for more details.
  * --------------------------------------------------------------------
  */
 void
-DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
+DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
 					   int nforks, BlockNumber *firstDelBlock)
 {
 	int			i;
 	int			j;
+	RelFileNodeBackend	rnode;
+	bool		cached = false;
+	BlockNumber	nForkBlocks[MAX_FORKNUM];
+	BlockNumber	nBlocksToInvalidate = 0;
+
+	rnode = smgr_reln->smgr_rnode;
 
 	/* If it's a local relation, it's localbuf.c's problem. */
 	if (RelFileNodeBackendIsTemp(rnode))
@@ -2992,6 +3006,35 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
 		return;
 	}
 
+	/*
+	 * Get the total number of to-be-invalidated blocks of a relation as
+	 * well as the total blocks for a given fork.  Give up the optimization
+	 * if the block is not cached.
+	 */
+	for (i = 0; i < nforks; i++)
+	{
+		/* Get the number of blocks for a relation's fork */
+		nForkBlocks[i] = smgrnblocks(smgr_reln, forkNum[i], &cached);
+
+		if (!cached)
+			break;
+
+		/* Get the number of blocks to be invalidated */
+		nBlocksToInvalidate += (nForkBlocks[i] - firstDelBlock[i]);
+	}
+
+	/*
+	 * Look up the buffers in the hashtable and drop them if the block size
+	 * is already cached and the total blocks to be invalidated is below the
+	 * full scan threshold.  Otherwise, give up the optimization.
+	 */
+	if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
+	{
+		FindAndDropRelFileNodeBuffers(rnode.node, forkNum, nforks,
+									  nForkBlocks, firstDelBlock);
+		return;
+	}
+
 	for (i = 0; i < NBuffers; i++)
 	{
 		BufferDesc *bufHdr = GetBufferDescriptor(i);
@@ -3135,6 +3178,65 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
 }
 
 /* ---------------------------------------------------------------------
+ *		FindAndDropRelFileNodeBuffers
+ *
+ *		This function finds and removes from the buffer pool all the pages
+ *		of the specified relation forks that have block numbers >= firstDelBlock.
+ *		(In particular, with firstDelBlock = 0, all pages are removed.)
+ *		This is only called in recovery when the block count of all forks are
+ *		cached and the total number of to-be-invalidated blocks per relation
+ *		do not exceed the threshold for full buffer scan.
+ * --------------------------------------------------------------------
+ */
+static void
+FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber *forkNum, int nforks,
+							  BlockNumber *nForkBlocks, BlockNumber *firstDelBlock)
+{
+	int		i;
+
+	for (i = 0; i < nforks; i++)
+	{
+		BlockNumber		curBlock;
+
+		for (curBlock = firstDelBlock[i]; curBlock < nForkBlocks[i]; curBlock++)
+		{
+			uint32		bufHash;		/* hash value for tag */
+			BufferTag	bufTag;			/* identity of requested block */
+			LWLock	   	*bufPartitionLock;	/* buffer partition lock for it */
+			int		buf_id;
+			BufferDesc	*bufHdr;
+			uint32		buf_state;
+
+			/* create a tag so we can lookup the buffer */
+			INIT_BUFFERTAG(bufTag, rnode, forkNum[i], curBlock);
+
+			/* determine its hash code and partition lock ID */
+			bufHash = BufTableHashCode(&bufTag);
+			bufPartitionLock = BufMappingPartitionLock(bufHash);
+
+			/* Check that it is in the buffer pool. If not, do nothing. */
+			LWLockAcquire(bufPartitionLock, LW_SHARED);
+			buf_id = BufTableLookup(&bufTag, bufHash);
+			LWLockRelease(bufPartitionLock);
+
+			if (buf_id < 0)
+				continue;
+
+			bufHdr = GetBufferDescriptor(buf_id);
+
+			buf_state = LockBufHdr(bufHdr);
+
+			if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
+				bufHdr->tag.forkNum == forkNum[i] &&
+				bufHdr->tag.blockNum >= firstDelBlock[i])
+				InvalidateBuffer(bufHdr);	/* releases spinlock */
+			else
+				UnlockBufHdr(bufHdr, buf_state);
+		}
+	}
+}
+
+/* ---------------------------------------------------------------------
  *		DropDatabaseBuffers
  *
  *		This function removes all the buffers in the buffer cache for a
@@ -3246,8 +3348,7 @@ PrintPinnedBufs(void)
  *		XXX currently it sequentially searches the buffer pool, should be
  *		changed to more clever ways of searching.  This routine is not
  *		used in any performance-critical code paths, so it's not worth
- *		adding additional overhead to normal paths to make it go faster;
- *		but see also DropRelFileNodeBuffers.
+ *		adding additional overhead to normal paths to make it go faster.
  * --------------------------------------------------------------------
  */
 void
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index e9dffd2..9d3a67c 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -605,7 +605,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
 	 * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
 	 * just drop them without bothering to write the contents.
 	 */
-	DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nforks, nblocks);
+	DropRelFileNodeBuffers(reln, forknum, nforks, nblocks);
 
 	/*
 	 * Send a shared-inval message to force other backends to close any smgr
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index ee91b8f..056f65e 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -203,7 +203,7 @@ extern void FlushOneBuffer(Buffer buffer);
 extern void FlushRelationBuffers(Relation rel);
 extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
 extern void FlushDatabaseBuffers(Oid dbid);
-extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
+extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum,
 								   int nforks, BlockNumber *firstDelBlock);
 extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes);
 extern void DropDatabaseBuffers(Oid dbid);
-- 
1.8.3.1

