From f7fb9d54c1a663cded37c0186ed79514bd133b6f Mon Sep 17 00:00:00 2001
From: Andrey Borodin <amborodin@acm.org>
Date: Sat, 19 Oct 2024 12:46:29 +0500
Subject: [PATCH] Prototype B-tree vacuum streamlineing

---
 src/backend/access/nbtree/nbtree.c | 66 ++++++++++++++++++++++++------
 1 file changed, 54 insertions(+), 12 deletions(-)

diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 56e502c4fc..5784d97f8f 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -86,7 +86,7 @@ typedef struct BTParallelScanDescData *BTParallelScanDesc;
 static void btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 						 IndexBulkDeleteCallback callback, void *callback_state,
 						 BTCycleId cycleid);
-static void btvacuumpage(BTVacState *vstate, BlockNumber scanblkno);
+static void btvacuumpage(BTVacState *vstate, Buffer buf);
 static BTVacuumPosting btreevacuumposting(BTVacState *vstate,
 										  IndexTuple posting,
 										  OffsetNumber updatedoffset,
@@ -1013,6 +1013,45 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 	needLock = !RELATION_IS_LOCAL(rel);
 
 	scanblkno = BTREE_METAPAGE + 1;
+
+	{
+		/* streamline reading most of index data */
+		BlockRangeReadStreamPrivate p;
+		ReadStream *stream = NULL;
+		p.current_blocknum = scanblkno;
+		if (needLock)
+			LockRelationForExtension(rel, ExclusiveLock);
+		/* We only streamline number of blocks that are know at the beginning */
+		p.last_exclusive = RelationGetNumberOfBlocks(rel);
+		if (needLock)
+			UnlockRelationForExtension(rel, ExclusiveLock);
+		stream = read_stream_begin_relation(READ_STREAM_FULL,
+											info->strategy,
+											rel,
+											MAIN_FORKNUM,
+											block_range_read_stream_cb,
+											&p,
+											0);
+		for (; scanblkno < p.last_exclusive; scanblkno++)
+		{
+			Buffer buf = read_stream_next_buffer(stream, NULL);
+			/*
+			 * We expect that blocks are returned in order.
+			 * However, we do not depent on it much, and in future ths
+			 * expetation might change.
+			 * Currently, this expectation only matters for progress reporting.
+			 */
+			Assert(BufferGetBlockNumber(buf) == scanblkno);
+			btvacuumpage(&vstate, buf);
+			if (info->report_progress)
+				pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
+											 scanblkno);
+		}
+		Assert(read_stream_next_buffer(stream, NULL) == InvalidBuffer);
+		read_stream_end(stream);
+	}
+
+	/* Now we have to process pages created after streamlined vacuum */
 	for (;;)
 	{
 		/* Get the current relation length */
@@ -1032,7 +1071,9 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 		/* Iterate over pages, then loop back to recheck length */
 		for (; scanblkno < num_pages; scanblkno++)
 		{
-			btvacuumpage(&vstate, scanblkno);
+			Buffer buf = ReadBufferExtended(rel, MAIN_FORKNUM, scanblkno, RBM_NORMAL,
+							 info->strategy);
+			btvacuumpage(&vstate, buf);
 			if (info->report_progress)
 				pgstat_progress_update_param(PROGRESS_SCAN_BLOCKS_DONE,
 											 scanblkno);
@@ -1069,7 +1110,7 @@ btvacuumscan(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
  * recycled (i.e. before the page split).
  */
 static void
-btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
+btvacuumpage(BTVacState *vstate, Buffer buf)
 {
 	IndexVacuumInfo *info = vstate->info;
 	IndexBulkDeleteResult *stats = vstate->stats;
@@ -1080,7 +1121,7 @@ btvacuumpage(BTVacState *vstate, BlockNumber scanblkno)
 	bool		attempt_pagedel;
 	BlockNumber blkno,
 				backtrack_to;
-	Buffer		buf;
+	BlockNumber scanblkno = BufferGetBlockNumber(buf);
 	Page		page;
 	BTPageOpaque opaque;
 
@@ -1094,14 +1135,6 @@ backtrack:
 	/* call vacuum_delay_point while not holding any buffer lock */
 	vacuum_delay_point();
 
-	/*
-	 * We can't use _bt_getbuf() here because it always applies
-	 * _bt_checkpage(), which will barf on an all-zero page. We want to
-	 * recycle all-zero pages, not fail.  Also, we want to use a nondefault
-	 * buffer access strategy.
-	 */
-	buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
-							 info->strategy);
 	_bt_lockbuf(rel, buf, BT_READ);
 	page = BufferGetPage(buf);
 	opaque = NULL;
@@ -1388,6 +1421,15 @@ backtrack:
 	if (backtrack_to != P_NONE)
 	{
 		blkno = backtrack_to;
+
+		/*
+		 * We can't use _bt_getbuf() here because it always applies
+		 * _bt_checkpage(), which will barf on an all-zero page. We want to
+		 * recycle all-zero pages, not fail.  Also, we want to use a nondefault
+		 * buffer access strategy.
+		 */
+		buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
+								info->strategy);
 		goto backtrack;
 	}
 }
-- 
2.39.5 (Apple Git-154)

