From 551f645ec91ed721f1b9c79b79235472e59a3c4d Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Tue, 5 Jan 2021 09:47:49 +0900
Subject: [PATCH v2 3/3] PoC: skip btree bulkdelete if the index doesn't grow.

On amvacuumstrategy, btree indexes returns INDEX_VACUUM_STRATEGY_NONE
if the index doesn't grow since last bulk-deletion. To remember that,
this change adds a new filed in the btree meta page to store the
number of blocks last bulkdelete time.

XXX: need to upgrade the meta page version.
---
 contrib/pageinspect/btreefuncs.c              |  5 +++
 contrib/pageinspect/expected/btree.out        |  3 +-
 contrib/pageinspect/pageinspect--1.8--1.9.sql | 18 +++++++++
 src/backend/access/nbtree/nbtpage.c           |  9 ++++-
 src/backend/access/nbtree/nbtree.c            | 40 +++++++++++++++++--
 src/backend/access/nbtree/nbtxlog.c           |  1 +
 src/backend/access/rmgrdesc/nbtdesc.c         |  5 ++-
 src/include/access/nbtree.h                   |  2 +
 src/include/access/nbtxlog.h                  |  1 +
 9 files changed, 77 insertions(+), 7 deletions(-)

diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
index 445605db58..94f648118f 100644
--- a/contrib/pageinspect/btreefuncs.c
+++ b/contrib/pageinspect/btreefuncs.c
@@ -692,6 +692,11 @@ bt_metap(PG_FUNCTION_ARGS)
 		values[j++] = "f";
 	}
 
+	if (metad->btm_version >= BTREE_VERSION)
+		values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_last_deletion_nblocks);
+	else
+		values[j++] = "-1";
+
 	tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
 								   values);
 
diff --git a/contrib/pageinspect/expected/btree.out b/contrib/pageinspect/expected/btree.out
index 17bf0c5470..5362bcb475 100644
--- a/contrib/pageinspect/expected/btree.out
+++ b/contrib/pageinspect/expected/btree.out
@@ -3,7 +3,7 @@ INSERT INTO test1 VALUES (72057594037927937, 'text');
 CREATE INDEX test1_a_idx ON test1 USING btree (a);
 \x
 SELECT * FROM bt_metap('test1_a_idx');
--[ RECORD 1 ]-----------+-------
+-[ RECORD 1 ]-----------+-----------
 magic                   | 340322
 version                 | 4
 root                    | 1
@@ -13,6 +13,7 @@ fastlevel               | 0
 oldest_xact             | 0
 last_cleanup_num_tuples | -1
 allequalimage           | t
+last_deletion_nblocks   | 4294967295
 
 SELECT * FROM bt_page_stats('test1_a_idx', 0);
 ERROR:  block 0 is a meta page
diff --git a/contrib/pageinspect/pageinspect--1.8--1.9.sql b/contrib/pageinspect/pageinspect--1.8--1.9.sql
index 9dc342fabc..a87b74ce2a 100644
--- a/contrib/pageinspect/pageinspect--1.8--1.9.sql
+++ b/contrib/pageinspect/pageinspect--1.8--1.9.sql
@@ -39,3 +39,21 @@ CREATE FUNCTION gist_page_items(IN page bytea,
 RETURNS SETOF record
 AS 'MODULE_PATHNAME', 'gist_page_items'
 LANGUAGE C STRICT PARALLEL SAFE;
+
+--
+-- bt_metap()
+--
+DROP FUNCTION bt_metap(text);
+CREATE FUNCTION bt_metap(IN relname text,
+    OUT magic int4,
+    OUT version int4,
+    OUT root int8,
+    OUT level int8,
+    OUT fastroot int8,
+    OUT fastlevel int8,
+    OUT oldest_xact xid,
+    OUT last_cleanup_num_tuples float8,
+    OUT allequalimage boolean,
+    OUT last_deletion_nblocks int8)
+AS 'MODULE_PATHNAME', 'bt_metap'
+LANGUAGE C STRICT PARALLEL SAFE;
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index e230f912c2..d686f25a7a 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -82,6 +82,7 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level,
 	metad->btm_oldest_btpo_xact = InvalidTransactionId;
 	metad->btm_last_cleanup_num_heap_tuples = -1.0;
 	metad->btm_allequalimage = allequalimage;
+	metad->btm_last_deletion_nblocks = InvalidBlockNumber;
 
 	metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	metaopaque->btpo_flags = BTP_META;
@@ -121,6 +122,7 @@ _bt_upgrademetapage(Page page)
 	metad->btm_version = BTREE_NOVAC_VERSION;
 	metad->btm_oldest_btpo_xact = InvalidTransactionId;
 	metad->btm_last_cleanup_num_heap_tuples = -1.0;
+
 	/* Only a REINDEX can set this field */
 	Assert(!metad->btm_allequalimage);
 	metad->btm_allequalimage = false;
@@ -185,17 +187,20 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
 	BTMetaPageData *metad;
 	bool		needsRewrite = false;
 	XLogRecPtr	recptr;
+	BlockNumber nblocks;
 
 	/* read the metapage and check if it needs rewrite */
 	metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
 	metapg = BufferGetPage(metabuf);
 	metad = BTPageGetMeta(metapg);
+	nblocks = RelationGetNumberOfBlocks(rel);
 
 	/* outdated version of metapage always needs rewrite */
 	if (metad->btm_version < BTREE_NOVAC_VERSION)
 		needsRewrite = true;
 	else if (metad->btm_oldest_btpo_xact != oldestBtpoXact ||
-			 metad->btm_last_cleanup_num_heap_tuples != numHeapTuples)
+			 metad->btm_last_cleanup_num_heap_tuples != numHeapTuples ||
+			 metad->btm_last_deletion_nblocks != nblocks)
 		needsRewrite = true;
 
 	if (!needsRewrite)
@@ -217,6 +222,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
 	/* update cleanup-related information */
 	metad->btm_oldest_btpo_xact = oldestBtpoXact;
 	metad->btm_last_cleanup_num_heap_tuples = numHeapTuples;
+	metad->btm_last_deletion_nblocks = nblocks;
 	MarkBufferDirty(metabuf);
 
 	/* write wal record if needed */
@@ -236,6 +242,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
 		md.oldest_btpo_xact = oldestBtpoXact;
 		md.last_cleanup_num_heap_tuples = numHeapTuples;
 		md.allequalimage = metad->btm_allequalimage;
+		md.last_deletion_nblocks = metad->btm_last_deletion_nblocks;
 
 		XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata));
 
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index e00e5fe0a4..56162cf41c 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -878,16 +878,50 @@ _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
 }
 
 /*
- * Choose the vacuum strategy. Do bulk-deletion unless index cleanup
- * is specified to off.
+ * Choose the vacuum strategy. Do bulk-deletion or nothing
  */
 IndexVacuumStrategy
 btvacuumstrategy(IndexVacuumInfo *info, VacuumParams *params)
 {
+	Buffer		metabuf;
+	Page		metapg;
+	BTMetaPageData *metad;
+	IndexVacuumStrategy result = INDEX_VACUUM_STRATEGY_NONE;
+
+	/*
+	 * Don't want to do bulk-deletion if index cleanup is disabled
+	 * by the user request.
+	 */
 	if (params->index_cleanup == VACOPT_TERNARY_DISABLED)
 		return INDEX_VACUUM_STRATEGY_NONE;
+
+	metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
+	metapg = BufferGetPage(metabuf);
+	metad = BTPageGetMeta(metapg);
+
+	if (metad->btm_version < BTREE_VERSION)
+	{
+		/*
+		 * Do bulk-deletion if metapage needs upgrade, because we don't
+		 * have meta-information yet.
+		 */
+		result = INDEX_VACUUM_STRATEGY_BULKDELETE;
+	}
 	else
-		return INDEX_VACUUM_STRATEGY_BULKDELETE;
+	{
+		BlockNumber	nblocks = RelationGetNumberOfBlocks(info->index);
+
+		/*
+		 * Do deletion if the index grows since the last deletion, by
+		 * even one block,	or for the first time.
+		 */
+		if (!BlockNumberIsValid(metad->btm_last_deletion_nblocks) ||
+			 nblocks > metad->btm_last_deletion_nblocks)
+			result = INDEX_VACUUM_STRATEGY_BULKDELETE;
+	}
+
+	_bt_relbuf(info->index, metabuf);
+	return result;
 }
 
 /*
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index c1d578cc01..37546f566d 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -115,6 +115,7 @@ _bt_restore_meta(XLogReaderState *record, uint8 block_id)
 	md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact;
 	md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples;
 	md->btm_allequalimage = xlrec->allequalimage;
+	md->btm_last_deletion_nblocks = xlrec->last_deletion_nblocks;
 
 	pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
 	pageop->btpo_flags = BTP_META;
diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c
index 6e0d6a2b72..4e58b0bc07 100644
--- a/src/backend/access/rmgrdesc/nbtdesc.c
+++ b/src/backend/access/rmgrdesc/nbtdesc.c
@@ -110,9 +110,10 @@ btree_desc(StringInfo buf, XLogReaderState *record)
 
 				xlrec = (xl_btree_metadata *) XLogRecGetBlockData(record, 0,
 																  NULL);
-				appendStringInfo(buf, "oldest_btpo_xact %u; last_cleanup_num_heap_tuples: %f",
+				appendStringInfo(buf, "oldest_btpo_xact %u; last_cleanup_num_heap_tuples: %f; last_deletion_nblocks: %u",
 								 xlrec->oldest_btpo_xact,
-								 xlrec->last_cleanup_num_heap_tuples);
+								 xlrec->last_cleanup_num_heap_tuples,
+								 xlrec->last_deletion_nblocks);
 				break;
 			}
 	}
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index ba120d4a80..f116e29735 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -110,6 +110,8 @@ typedef struct BTMetaPageData
 	float8		btm_last_cleanup_num_heap_tuples;	/* number of heap tuples
 													 * during last cleanup */
 	bool		btm_allequalimage;	/* are all columns "equalimage"? */
+	BlockNumber	btm_last_deletion_nblocks;	/* number of blocks during last
+											 * bulk-deletion */
 } BTMetaPageData;
 
 #define BTPageGetMeta(p) \
diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h
index 7ae5c98c2b..bc0c52a779 100644
--- a/src/include/access/nbtxlog.h
+++ b/src/include/access/nbtxlog.h
@@ -55,6 +55,7 @@ typedef struct xl_btree_metadata
 	TransactionId oldest_btpo_xact;
 	float8		last_cleanup_num_heap_tuples;
 	bool		allequalimage;
+	BlockNumber last_deletion_nblocks;
 } xl_btree_metadata;
 
 /*
-- 
2.27.0

