From 4bb6343e231fdcb28b0b71dda9ed8f6550dbb7bd Mon Sep 17 00:00:00 2001
From: Kirk Jamison <k.jamison@jp.fujitsu.com>
Date: Wed, 2 Dec 2020 12:48:04 +0000
Subject: [PATCH v33 4/4] Optimize DropRelFileNodesAllBuffers() in recovery.

DropRelFileNodesAllBuffers() is optimized to skip the time-consuming
scan of the whole buffer pool during recovery when the relation is
small enough, or when the number of blocks to be invalidated is below
the full scan threshold. This improves the DropRelationFiles()
performance when the TRUNCATE command truncated off any of the empty
pages at the end of relation, and when dropping relation buffers if a
commit/rollback transaction has been prepared in FinishPreparedTransaction().
---
 src/backend/storage/buffer/bufmgr.c | 85 ++++++++++++++++++++++++++++++++-----
 src/backend/storage/smgr/smgr.c     | 14 +++---
 src/include/storage/bufmgr.h        |  2 +-
 3 files changed, 83 insertions(+), 18 deletions(-)

diff --git a/src/backend/storage/buffer/bufmgr.c b/src/backend/storage/buffer/bufmgr.c
index c3ee6c6..8319208 100644
--- a/src/backend/storage/buffer/bufmgr.c
+++ b/src/backend/storage/buffer/bufmgr.c
@@ -3091,28 +3091,27 @@ DropRelFileNodeBuffers(SMgrRelation smgr_reln, ForkNumber *forkNum,
  * --------------------------------------------------------------------
  */
 void
-DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
+DropRelFileNodesAllBuffers(SMgrRelation *smgr_reln, int nnodes)
 {
-	int			i,
-				n = 0;
+	int			i;
+	int			j;
+	int			n = 0;
 	RelFileNode *nodes;
 	bool		use_bsearch;
 
 	if (nnodes == 0)
 		return;
 
-	nodes = palloc(sizeof(RelFileNode) * nnodes);	/* non-local relations */
-
 	/* If it's a local relation, it's localbuf.c's problem. */
 	for (i = 0; i < nnodes; i++)
 	{
-		if (RelFileNodeBackendIsTemp(rnodes[i]))
+		if (RelFileNodeBackendIsTemp(smgr_reln[i]->smgr_rnode))
 		{
-			if (rnodes[i].backend == MyBackendId)
-				DropRelFileNodeAllLocalBuffers(rnodes[i].node);
+			if (smgr_reln[i]->smgr_rnode.backend == MyBackendId)
+				DropRelFileNodeAllLocalBuffers(smgr_reln[i]->smgr_rnode.node);
 		}
 		else
-			nodes[n++] = rnodes[i].node;
+			n++;
 	}
 
 	/*
@@ -3120,11 +3119,77 @@ DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes)
 	 * memory and return.
 	 */
 	if (n == 0)
+		return;
+
+	/*
+	 * We enter the optimization iff we are in recovery.  Otherwise,
+	 * we proceed to full scan of the whole buffer pool.
+	 */
+	if (InRecovery)
 	{
-		pfree(nodes);
+		BlockNumber (*block)[MAX_FORKNUM + 1] = (BlockNumber (*)[MAX_FORKNUM + 1])
+												palloc(sizeof(BlockNumber) * n * (MAX_FORKNUM + 1));
+		BlockNumber	nBlocksToInvalidate = 0;
+
+		for (i = 0; i < n; i++)
+		{
+			for (j = 0; j <= MAX_FORKNUM; j++)
+			{
+				/*
+				 * Assign block to InvalidblockNumber if a relation
+				 * fork does not exist, so that we can skip it later
+				 * when dropping the relation buffers.
+				 */
+				if (!smgrexists(smgr_reln[i], j))
+				{
+					block[i][j] = InvalidBlockNumber;
+					continue;
+				}
+
+				/* Get the number of blocks for a relation's fork */
+				block[i][j] = smgrnblocks(smgr_reln[i], j, NULL);
+
+				nBlocksToInvalidate += block[i][j];
+			}
+		}
+
+		/*
+		 * Give up the optimization if the total number of blocks
+		 * to be invalidated for all relations is greater than or
+		 * equal to the full scan threshold.
+		 */
+		if (nBlocksToInvalidate >= BUF_DROP_FULL_SCAN_THRESHOLD)
+		{
+			pfree(block);
+			goto buffer_full_scan;
+		}
+
+		for (i = 0; i < n; i++)
+		{
+			/*
+			 * If block to drop is valid, drop the buffers of the fork.
+			 * Zero the firstDelBlock because all buffers will be
+			 * dropped anyway.
+			 */
+			for (j = 0; j <= MAX_FORKNUM; j++)
+			{
+				if (BlockNumberIsValid(block[i][j]))
+				{
+					FindAndDropRelFileNodeBuffers(smgr_reln[i]->smgr_rnode.node,
+												  j, block[i][j], 0);
+				}
+			}
+		}
+
+		pfree(block);
 		return;
 	}
 
+buffer_full_scan:
+	nodes = palloc(sizeof(RelFileNode) * n); /* non-local relations */
+	for (i = 0; i < n; i++)
+		nodes[i] = smgr_reln[i]->smgr_rnode.node;
+
 	/*
 	 * For low number of relations to drop just use a simple walk through, to
 	 * save the bsearch overhead. The threshold to use is rather a guess than
diff --git a/src/backend/storage/smgr/smgr.c b/src/backend/storage/smgr/smgr.c
index 9d3a67c..3663bb7 100644
--- a/src/backend/storage/smgr/smgr.c
+++ b/src/backend/storage/smgr/smgr.c
@@ -392,7 +392,13 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 		return;
 
 	/*
-	 * create an array which contains all relations to be dropped, and close
+	 * Get rid of any remaining buffers for the relations.  bufmgr will just
+	 * drop them without bothering to write the contents.
+	 */
+	DropRelFileNodesAllBuffers(rels, nrels);
+
+	/*
+	 * Create an array which contains all relations to be dropped, and close
 	 * each relation's forks at the smgr level while at it
 	 */
 	rnodes = palloc(sizeof(RelFileNodeBackend) * nrels);
@@ -409,12 +415,6 @@ smgrdounlinkall(SMgrRelation *rels, int nrels, bool isRedo)
 	}
 
 	/*
-	 * Get rid of any remaining buffers for the relations.  bufmgr will just
-	 * drop them without bothering to write the contents.
-	 */
-	DropRelFileNodesAllBuffers(rnodes, nrels);
-
-	/*
 	 * It'd be nice to tell the stats collector to forget them immediately,
 	 * too. But we can't because we don't know the OIDs.
 	 */
diff --git a/src/include/storage/bufmgr.h b/src/include/storage/bufmgr.h
index 056f65e..2e5189b 100644
--- a/src/include/storage/bufmgr.h
+++ b/src/include/storage/bufmgr.h
@@ -205,7 +205,7 @@ extern void FlushRelationsAllBuffers(struct SMgrRelationData **smgrs, int nrels)
 extern void FlushDatabaseBuffers(Oid dbid);
 extern void DropRelFileNodeBuffers(struct SMgrRelationData *smgr_reln, ForkNumber *forkNum,
 								   int nforks, BlockNumber *firstDelBlock);
-extern void DropRelFileNodesAllBuffers(RelFileNodeBackend *rnodes, int nnodes);
+extern void DropRelFileNodesAllBuffers(struct SMgrRelationData **smgr_reln, int nnodes);
 extern void DropDatabaseBuffers(Oid dbid);
 
 #define RelationGetNumberOfBlocks(reln) \
-- 
1.8.3.1

