From 926f97c8ffc9cfe3237a44ec47854d5d3f7bc4a9 Mon Sep 17 00:00:00 2001
From: Kirk Jamison <k.jamison@jp.fujitsu.com>
Date: Tue, 10 Nov 2020 06:27:11 +0000
Subject: [PATCH v33 3/4] Optimize DropRelFileNodeBuffers() during recovery.

The recovery path of DropRelFileNodeBuffers() is optimized so that
scanning of the whole buffer pool is avoided when the relation
is small enough, or the the total number of blocks to be invalidated
is below the threshold of full scanning. This improves the
performance especially when VACUUM or autovacuum truncated off any of
the empty pages at the end of relation.

While recovery, when WAL files of XLOG_SMGR_TRUNCATE from vacuum or
autovacuum are replayed, we must not leave a buffer for the relations
to be dropped.  So we invalidate buffer blocks by locating using
BufTableLookup() when it is certain that we know up to what page of
every fork we possiblly have a buffer.  We can know that by the
"cached" flag returned by smgrnblocks(), which currently gets true
only while recovery.  It's safe to use the cached nblocks because it
is guaranteed to be the maximum page we have in shared buffer during
recovery, and that guarantee is held by not asking fseek once we have
cached the value.  Also, the cached nblocks will not be invalidated
by file extension during recovery. See smgrnblocks() and smgrextend()
for more details.

If we are not in recovery or when the nblocks is not cached, then we
scan the whole buffer pool to search and drop the buffers of relation,
which is slower when a small part of buffers are to be dropped.
---
 src/backend/storage/buffer/bufmgr.c | 115 +++++++++++++++++++++++++++++++++---
 src/backend/storage/smgr/smgr.c     |   2 +-
 src/include/storage/bufmgr.h        |   2 +-
 3 files changed, 110 insertions(+), 9 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index 1680bf4..c3ee6c6 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -70,6 +70,8 @@
 
 #define RELS_BSEARCH_THRESHOLD		20
 
+#define BUF_DROP_FULL_SCAN_THRESHOLD		(uint32)(NBuffers / 256)
+
 typedef struct PrivateRefCountEntry
 {
 	Buffer		buffer;
@@ -473,6 +475,10 @@ static BufferDesc *BufferAlloc(SMgrRelation smgr,
 							   BufferAccessStrategy strategy,
 							   bool *foundPtr);
 static void FlushBuffer(BufferDesc *buf, SMgrRelation reln);
+static void FindAndDropRelFileNodeBuffers(RelFileNode rnode,
+										  ForkNumber forkNum,
+										  BlockNumber nForkBlock,
+										  BlockNumber firstDelBlock);
 static void AtProcExit_Buffers(int code, Datum arg);
 static void CheckForBufferLeaks(void);
 static int	rnode_comparator(const void *p1, const void *p2);
@@ -2967,18 +2973,28 @@ BufferGetLSNAtomic(Buffer buffer)
  *		that no other process could be trying to load more pages of the
  *		relation into buffers.
  *
- *		XXX currently it sequentially searches the buffer pool, should be
- *		changed to more clever ways of searching.  However, this routine
- *		is used only in code paths that aren't very performance-critical,
- *		and we shouldn't slow down the hot paths to make it faster ...
+ *		We must not leave a buffer for the relations to be dropped.  We
+ *		invalidate buffer blocks by locating using BufTableLookup() when it
+ *		is certain that we know up to what page of every fork we possiblly
+ *		have a buffer.  We can know that by the "cached" flag returned by
+ *		smgrnblocks(), which currently gets true only while recovery.  See
+ *		smgrnblocks() and smgrextend().  Otherwise we scan the whole buffer
+ *		pool to find buffers for the relation, which is slower when a small
+ *		part of buffers are to be dropped.
  * --------------------------------------------------------------------
  */
 void
-DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
+DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
 					   int nforks, BlockNumber *firstDelBlock)
 {
 	int			i;
 	int			j;
+	RelFileNodeBackend	rnode;
+	bool		cached = false;
+	BlockNumber	nForkBlock[MAX_FORKNUM];
+	BlockNumber	nBlocksToInvalidate = 0;
+
+	rnode = smgr_reln->smgr_rnode;
 
 	/* If it's a local relation, it's localbuf.c's problem. */
 	if (RelFileNodeBackendIsTemp(rnode))
@@ -2992,6 +3008,38 @@ DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
 		return;
 	}
 
+	/*
+	 * Get the total number of to-be-invalidated blocks of a relation as
+	 * well as the total blocks for a given fork.  Give up the optimization
+	 * if the block is not cached.
+	 */
+	for (i = 0; i < nforks; i++)
+	{
+		/* Get the number of blocks for a relation's fork */
+		nForkBlock[i] = smgrnblocks(smgr_reln, forkNum[i], &cached);
+
+		if (!cached)
+			break;
+
+		/* Get the number of blocks to be invalidated */
+		nBlocksToInvalidate += (nForkBlock[i] - firstDelBlock[i]);
+	}
+
+	/*
+	 * Look up the buffers in the hashtable and drop them if the block size
+	 * is already cached and the total blocks to be invalidated is below the
+	 * full scan threshold.  Otherwise, give up the optimization.
+	 */
+	if (cached && nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
+	{
+		for (j = 0; j < nforks; j++)
+		{
+			FindAndDropRelFileNodeBuffers(rnode.node, forkNum[j],
+										  nForkBlock[j], firstDelBlock[j]);
+		}
+		return;
+	}
+
 	for (i = 0; i < NBuffers; i++)
 	{
 		BufferDesc *bufHdr = GetBufferDescriptor(i);
@@ -3135,6 +3183,60 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
 }
 
 /* ---------------------------------------------------------------------
+ *		FindAndDropRelFileNodeBuffers
+ *
+ *		This function finds and removes from the buffer pool all the pages
+ *		of the specified relation fork that has block number >= firstDelBlock.
+ *		(In particular, with firstDelBlock = 0, all pages are removed.)
+ *		This is only called in recovery when the block count of a fork is
+ *		cached or when the total number of to-be-invalidated blocks for
+ *		involved relation/s does not exceed the threshold for buffer full scan.
+ * --------------------------------------------------------------------
+ */
+static void
+FindAndDropRelFileNodeBuffers(RelFileNode rnode, ForkNumber forkNum,
+							  BlockNumber nForkBlock, BlockNumber firstDelBlock)
+{
+	BlockNumber		curBlock;
+
+	for (curBlock = firstDelBlock; curBlock < nForkBlock; curBlock++)
+	{
+		uint32		bufHash;		/* hash value for tag */
+		BufferTag	bufTag;			/* identity of requested block */
+		LWLock	   	*bufPartitionLock;	/* buffer partition lock for it */
+		int		buf_id;
+		BufferDesc	*bufHdr;
+		uint32		buf_state;
+
+		/* create a tag so we can lookup the buffer */
+		INIT_BUFFERTAG(bufTag, rnode, forkNum, curBlock);
+
+		/* determine its hash code and partition lock ID */
+		bufHash = BufTableHashCode(&bufTag);
+		bufPartitionLock = BufMappingPartitionLock(bufHash);
+
+		/* Check that it is in the buffer pool. If not, do nothing. */
+		LWLockAcquire(bufPartitionLock, LW_SHARED);
+		buf_id = BufTableLookup(&bufTag, bufHash);
+		LWLockRelease(bufPartitionLock);
+
+		if (buf_id < 0)
+			continue;
+
+		bufHdr = GetBufferDescriptor(buf_id);
+
+		buf_state = LockBufHdr(bufHdr);
+
+		if (RelFileNodeEquals(bufHdr->tag.rnode, rnode) &&
+			bufHdr->tag.forkNum == forkNum &&
+			bufHdr->tag.blockNum >= firstDelBlock)
+			InvalidateBuffer(bufHdr);	/* releases spinlock */
+		else
+			UnlockBufHdr(bufHdr, buf_state);
+	}
+}
+
+/* ---------------------------------------------------------------------
  *		DropDatabaseBuffers
  *
  *		This function removes all the buffers in the buffer cache for a
@@ -3246,8 +3348,7 @@ PrintPinnedBufs(void)
  *		XXX currently it sequentially searches the buffer pool, should be
  *		changed to more clever ways of searching.  This routine is not
  *		used in any performance-critical code paths, so it's not worth
- *		adding additional overhead to normal paths to make it go faster;
- *		but see also DropRelFileNodeBuffers.
+ *		adding additional overhead to normal paths to make it go faster.
  * --------------------------------------------------------------------
  */
 void
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index e9dffd2..9d3a67c 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -605,7 +605,7 @@ smgrtruncate(SMgrRelation reln, ForkNumber *forknum, int nforks, BlockNumber *nb
 	 * Get rid of any buffers for the about-to-be-deleted blocks. bufmgr will
 	 * just drop them without bothering to write the contents.
 	 */
-	DropRelFileNodeBuffers(reln->smgr_rnode, forknum, nforks, nblocks);
+	DropRelFileNodeBuffers(reln, forknum, nforks, nblocks);
 
 	/*
 	 * Send a shared-inval message to force other backends to close any smgr
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index ee91b8f..056f65e 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -203,7 +203,7 @@ extern void FlushOneBuffer(Buffer buffer);
 extern void FlushRelationBuffers(Relation rel);
 extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels);
 extern void FlushDatabaseBuffers(Oid dbid);
-extern void DropRelFileNodeBuffers(RelFileNodeBackend rnode, ForkNumber *forkNum,
+extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum,
 								   int nforks, BlockNumber *firstDelBlock);
 extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes);
 extern void DropDatabaseBuffers(Oid dbid);
-- 
1.8.3.1

