From 9cad80a4a321c88057de82913d3a0ce700982616 Mon Sep 17 00:00:00 2001
From: Kirk Jamison <k.jamison@jp.fujitsu.com>
Date: Wed, 2 Dec 2020 12:48:04 +0000
Subject: [PATCH v35 4/4] Optimize DropRelFileNodesAllBuffers() in recovery.

DropRelFileNodesAllBuffers() is optimized to skip the time-consuming
scan of the whole buffer pool during recovery when the relation is
small enough, or when the number of blocks to be invalidated is below
the full scan threshold. This improves the DropRelationFiles()
performance, for example, when the TRUNCATE command truncated off any
of the empty pages at the end of relation.

We first check if all the relations are candidates for optimization
during recovery, by ensuring that a cached size was returned by
smgrnblocks() for a relation fork.  Similar to DropRelFileNodeBuffers,
we invalidate buffer blocks by locating using BufTableLookup() when it
is certain that we know up to what page of every fork we possiblly
have a buffer.  That can be checked through the "cached" flag returned
by smgrnblocks(), which currently gets true only while recovery.
Otherwise, we proceed to full scan of the whole buffer pool if a size
of a particular relation fork is not "cached", which can happen if
there are no updates to that existing fork during recovery.
---
 src/backend/storage/buffer/bufmgr.c | 86 ++++++++++++++++++++++++++++++++-----
 src/backend/storage/smgr/smgr.c     | 14 +++---
 src/include/storage/bufmgr.h        |  2 +-
 3 files changed, 84 insertions(+), 18 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index c3ee6c6..ba5acaf 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3091,28 +3091,30 @@ DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
  * --------------------------------------------------------------------
  */
 void
-DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
+DropRelFileNodesAllBuffers(SMgrRelation *smgr_reln, int nnodes)
 {
-	int			i,
-				n = 0;
+	int			i;
+	int			j;
+	int			n = 0;
+	BlockNumber (*block)[MAX_FORKNUM + 1];
+	BlockNumber	nBlocksToInvalidate = 0;
 	RelFileNode *nodes;
+	bool		cached = false;
 	bool		use_bsearch;
 
 	if (nnodes == 0)
 		return;
 
-	nodes = palloc(sizeof(RelFileNode) * nnodes);	/* non-local relations */
-
 	/* If it's a local relation, it's localbuf.c's problem. */
 	for (i = 0; i < nnodes; i++)
 	{
-		if (RelFileNodeBackendIsTemp(rnodes[i]))
+		if (RelFileNodeBackendIsTemp(smgr_reln[i]->smgr_rnode))
 		{
-			if (rnodes[i].backend == MyBackendId)
-				DropRelFileNodeAllLocalBuffers(rnodes[i].node);
+			if (smgr_reln[i]->smgr_rnode.backend == MyBackendId)
+				DropRelFileNodeAllLocalBuffers(smgr_reln[i]->smgr_rnode.node);
 		}
 		else
-			nodes[n++] = rnodes[i].node;
+			n++;
 	}
 
 	/*
@@ -3120,11 +3122,75 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
 	 * memory and return.
 	 */
 	if (n == 0)
+		return;
+
+	block = (BlockNumber (*)[MAX_FORKNUM + 1])
+			palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
+
+	/*
+	 * Check if all the relations are candidates for buffer drop
+	 * optimization.  Otherwise, we proceed to full scan of the
+	 * whole buffer pool if a size of a particular relation fork
+	 * is not cached, which can happen if there are no updates
+	 * to that fork during recovery.
+	 */
+	for (i = 0; i < n; i++)
 	{
-		pfree(nodes);
+		for (j = 0; j <= MAX_FORKNUM; j++)
+		{
+			/*
+			 * Assign InvalidblockNumber to a block if a relation
+			 * fork does not exist, so that we can skip it later
+			 * when dropping the relation buffers.
+			 */
+			if (!smgrexists(smgr_reln[i], j))
+			{
+				block[i][j] = InvalidBlockNumber;
+				continue;
+			}
+
+			/* Get the number of blocks for a relation's fork */
+			block[i][j] = smgrnblocks(smgr_reln[i], j, &cached);
+
+			if (!cached)
+				goto buffer_full_scan;
+
+			nBlocksToInvalidate += block[i][j];
+		}
+	}
+
+	/*
+	 * Enter the optimization if the total number of blocks to be
+	 * invalidated for all relations is below the full scan threshold.
+	 */
+	if (nBlocksToInvalidate < BUF_DROP_FULL_SCAN_THRESHOLD)
+	{
+		for (i = 0; i < n; i++)
+		{
+			/*
+			 * If block to drop is valid, drop the buffers of the fork.
+			 * Zero the firstDelBlock because all buffers will be
+			 * dropped anyway.
+			 */
+			for (j = 0; j <= MAX_FORKNUM; j++)
+			{
+				if (!BlockNumberIsValid(block[i][j]))
+					continue;
+
+				FindAndDropRelFileNodeBuffers(smgr_reln[i]->smgr_rnode.node,
+											  j, block[i][j], 0);
+			}
+		}
+		pfree(block);
 		return;
 	}
 
+buffer_full_scan:
+	pfree(block);
+	nodes = palloc(sizeof(RelFileNode) * n); /* non-local relations */
+	for (i = 0; i < n; i++)
+		nodes[i] = smgr_reln[i]->smgr_rnode.node;
+
 	/*
 	 * For low number of relations to drop just use a simple walk through, to
 	 * save the bsearch overhead. The threshold to use is rather a guess than
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 9d3a67c..3663bb7 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -392,7 +392,13 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 		return;
 
 	/*
-	 * create an array which contains all relations to be dropped, and close
+	 * Get rid of any remaining buffers for the relations.  bufmgr will just
+	 * drop them without bothering to write the contents.
+	 */
+	DropRelFileNodesAllBuffers(rels, nrels);
+
+	/*
+	 * Create an array which contains all relations to be dropped, and close
 	 * each relation's forks at the smgr level while at it
 	 */
 	rnodes = palloc(sizeof(RelFileNodeBackend) * nrels);
@@ -409,12 +415,6 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 	}
 
 	/*
-	 * Get rid of any remaining buffers for the relations.  bufmgr will just
-	 * drop them without bothering to write the contents.
-	 */
-	DropRelFileNodesAllBuffers(rnodes, nrels);
-
-	/*
 	 * It'd be nice to tell the stats collector to forget them immediately,
 	 * too. But we can't because we don't know the OIDs.
 	 */
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 056f65e..2e5189b 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -205,7 +205,7 @@ extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels)
 extern void FlushDatabaseBuffers(Oid dbid);
 extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum,
 								   int nforks, BlockNumber *firstDelBlock);
-extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes);
+extern void DropRelFileNodesAllBuffers(struct SMgrRelationData **smgr_reln, int nnodes);
 extern void DropDatabaseBuffers(Oid dbid);
 
 #define RelationGetNumberOfBlocks(reln) \
-- 
1.8.3.1

