From 7b72e34e931dca1c0c8ea77f182b33a739dc2eba Mon Sep 17 00:00:00 2001
From: Masahiko Sawada <sawada.mshk@gmail.com>
Date: Tue, 5 Jan 2021 09:47:49 +0900
Subject: [PATCH 3/3] Skip btree bulkdelete if the index doesn't grow.

On amvacuumstrategy, btree indexes returns INDEX_VACUUM_STRATEGY_NONE
if the index doesn't grow since last bulk-deletion. To remember that,
this change adds a new filed in the btree meta page to store the
number of blocks last bulkdelete time.

XXX: need to upgrade the meta page version.
---
 contrib/pageinspect/Makefile                  |  3 +-
 contrib/pageinspect/btreefuncs.c              |  5 +++
 contrib/pageinspect/pageinspect--1.8--1.9.sql | 22 +++++++++++++
 contrib/pageinspect/pageinspect.control       |  2 +-
 src/backend/access/nbtree/nbtpage.c           |  9 ++++-
 src/backend/access/nbtree/nbtree.c            | 33 ++++++++++++++++++-
 src/backend/access/nbtree/nbtxlog.c           |  1 +
 src/backend/access/rmgrdesc/nbtdesc.c         |  5 +--
 src/include/access/nbtree.h                   |  2 ++
 src/include/access/nbtxlog.h                  |  1 +
 10 files changed, 77 insertions(+), 6 deletions(-)
 create mode 100644 contrib/pageinspect/pageinspect--1.8--1.9.sql

diff --git a/contrib/pageinspect/Makefile b/contrib/pageinspect/Makefile
index d9d8177116..a0760afa4e 100644
--- a/contrib/pageinspect/Makefile
+++ b/contrib/pageinspect/Makefile
@@ -12,7 +12,8 @@ OBJS = \
 	rawpage.o
 
 EXTENSION = pageinspect
-DATA =  pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \
+DATA = pageinspect--1.8--1.9.sql \
+	pageinspect--1.7--1.8.sql pageinspect--1.6--1.7.sql \
 	pageinspect--1.5.sql pageinspect--1.5--1.6.sql \
 	pageinspect--1.4--1.5.sql pageinspect--1.3--1.4.sql \
 	pageinspect--1.2--1.3.sql pageinspect--1.1--1.2.sql \
diff --git a/contrib/pageinspect/btreefuncs.c b/contrib/pageinspect/btreefuncs.c
index 445605db58..94f648118f 100644
--- a/contrib/pageinspect/btreefuncs.c
+++ b/contrib/pageinspect/btreefuncs.c
@@ -692,6 +692,11 @@ bt_metap(PG_FUNCTION_ARGS)
 		values[j++] = "f";
 	}
 
+	if (metad->btm_version >= BTREE_VERSION)
+		values[j++] = psprintf(INT64_FORMAT, (int64) metad->btm_last_deletion_nblocks);
+	else
+		values[j++] = "-1";
+
 	tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
 								   values);
 
diff --git a/contrib/pageinspect/pageinspect--1.8--1.9.sql b/contrib/pageinspect/pageinspect--1.8--1.9.sql
new file mode 100644
index 0000000000..bd1752cf35
--- /dev/null
+++ b/contrib/pageinspect/pageinspect--1.8--1.9.sql
@@ -0,0 +1,22 @@
+/* contrib/pageinspect/pageinspect--1.8-1.9.sql */
+
+-- complain if script is sourced in psql, rather than via ALTER EXTENSION
+\echo Use "ALTER EXTENSION pageinspect UPDATE TO '1.9'" to load this file. \quit
+
+--
+-- bt_metap()
+--
+DROP FUNCTION bt_metap(text);
+CREATE FUNCTION bt_metap(IN relname text,
+    OUT magic int4,
+    OUT version int4,
+    OUT root int8,
+    OUT level int8,
+    OUT fastroot int8,
+    OUT fastlevel int8,
+    OUT oldest_xact xid,
+    OUT last_cleanup_num_tuples float8,
+    OUT allequalimage boolean,
+    OUT last_deletion_nblocks int8)
+AS 'MODULE_PATHNAME', 'bt_metap'
+LANGUAGE C STRICT PARALLEL SAFE;
diff --git a/contrib/pageinspect/pageinspect.control b/contrib/pageinspect/pageinspect.control
index f8cdf526c6..bd716769a1 100644
--- a/contrib/pageinspect/pageinspect.control
+++ b/contrib/pageinspect/pageinspect.control
@@ -1,5 +1,5 @@
 # pageinspect extension
 comment = 'inspect the contents of database pages at a low level'
-default_version = '1.8'
+default_version = '1.9'
 module_pathname = '$libdir/pageinspect'
 relocatable = true
diff --git a/src/backend/access/nbtree/nbtpage.c b/src/backend/access/nbtree/nbtpage.c
index 89eb66a8a6..eac78d3b7e 100644
--- a/src/backend/access/nbtree/nbtpage.c
+++ b/src/backend/access/nbtree/nbtpage.c
@@ -76,6 +76,7 @@ _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level,
 	metad->btm_oldest_btpo_xact = InvalidTransactionId;
 	metad->btm_last_cleanup_num_heap_tuples = -1.0;
 	metad->btm_allequalimage = allequalimage;
+	metad->btm_last_deletion_nblocks = InvalidBlockNumber;
 
 	metaopaque = (BTPageOpaque) PageGetSpecialPointer(page);
 	metaopaque->btpo_flags = BTP_META;
@@ -115,6 +116,7 @@ _bt_upgrademetapage(Page page)
 	metad->btm_version = BTREE_NOVAC_VERSION;
 	metad->btm_oldest_btpo_xact = InvalidTransactionId;
 	metad->btm_last_cleanup_num_heap_tuples = -1.0;
+
 	/* Only a REINDEX can set this field */
 	Assert(!metad->btm_allequalimage);
 	metad->btm_allequalimage = false;
@@ -179,17 +181,20 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
 	BTMetaPageData *metad;
 	bool		needsRewrite = false;
 	XLogRecPtr	recptr;
+	BlockNumber nblocks;
 
 	/* read the metapage and check if it needs rewrite */
 	metabuf = _bt_getbuf(rel, BTREE_METAPAGE, BT_READ);
 	metapg = BufferGetPage(metabuf);
 	metad = BTPageGetMeta(metapg);
+	nblocks = RelationGetNumberOfBlocks(rel);
 
 	/* outdated version of metapage always needs rewrite */
 	if (metad->btm_version < BTREE_NOVAC_VERSION)
 		needsRewrite = true;
 	else if (metad->btm_oldest_btpo_xact != oldestBtpoXact ||
-			 metad->btm_last_cleanup_num_heap_tuples != numHeapTuples)
+			 metad->btm_last_cleanup_num_heap_tuples != numHeapTuples ||
+			 metad->btm_last_deletion_nblocks != nblocks)
 		needsRewrite = true;
 
 	if (!needsRewrite)
@@ -211,6 +216,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
 	/* update cleanup-related information */
 	metad->btm_oldest_btpo_xact = oldestBtpoXact;
 	metad->btm_last_cleanup_num_heap_tuples = numHeapTuples;
+	metad->btm_last_deletion_nblocks = nblocks;
 	MarkBufferDirty(metabuf);
 
 	/* write wal record if needed */
@@ -230,6 +236,7 @@ _bt_update_meta_cleanup_info(Relation rel, TransactionId oldestBtpoXact,
 		md.oldest_btpo_xact = oldestBtpoXact;
 		md.last_cleanup_num_heap_tuples = numHeapTuples;
 		md.allequalimage = metad->btm_allequalimage;
+		md.last_deletion_nblocks = metad->btm_last_deletion_nblocks;
 
 		XLogRegisterBufData(0, (char *) &md, sizeof(xl_btree_metadata));
 
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index c9a177d5e1..7409c23a5c 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -882,7 +882,38 @@ _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
 IndexVacuumStrategy
 btvacuumstrategy(IndexVacuumInfo *info)
 {
-	return INDEX_VACUUM_STRATEGY_BULKDELETE;
+	Buffer		metabuf;
+	Page		metapg;
+	BTMetaPageData *metad;
+	IndexVacuumStrategy result = INDEX_VACUUM_STRATEGY_NONE;
+
+	metabuf = _bt_getbuf(info->index, BTREE_METAPAGE, BT_READ);
+	metapg = BufferGetPage(metabuf);
+	metad = BTPageGetMeta(metapg);
+
+	if (metad->btm_version < BTREE_VERSION)
+	{
+		/*
+		 * Do bulk-deletion if metapage needs upgrade, because we don't
+		 * have meta-information yet.
+		 */
+		result = INDEX_VACUUM_STRATEGY_BULKDELETE;
+	}
+	else
+	{
+		BlockNumber	nblocks = RelationGetNumberOfBlocks(info->index);
+
+		/*
+		 * Do deletion if the index grows since the last deletion or for
+		 * the first time.
+		 */
+		if (!BlockNumberIsValid(metad->btm_last_deletion_nblocks) ||
+			 nblocks > metad->btm_last_deletion_nblocks)
+			result = INDEX_VACUUM_STRATEGY_BULKDELETE;
+	}
+
+	_bt_relbuf(info->index, metabuf);
+	return result;
 }
 
 /*
diff --git a/src/backend/access/nbtree/nbtxlog.c b/src/backend/access/nbtree/nbtxlog.c
index 45313d924c..65e537211c 100644
--- a/src/backend/access/nbtree/nbtxlog.c
+++ b/src/backend/access/nbtree/nbtxlog.c
@@ -115,6 +115,7 @@ _bt_restore_meta(XLogReaderState *record, uint8 block_id)
 	md->btm_oldest_btpo_xact = xlrec->oldest_btpo_xact;
 	md->btm_last_cleanup_num_heap_tuples = xlrec->last_cleanup_num_heap_tuples;
 	md->btm_allequalimage = xlrec->allequalimage;
+	md->btm_last_deletion_nblocks = xlrec->last_deletion_nblocks;
 
 	pageop = (BTPageOpaque) PageGetSpecialPointer(metapg);
 	pageop->btpo_flags = BTP_META;
diff --git a/src/backend/access/rmgrdesc/nbtdesc.c b/src/backend/access/rmgrdesc/nbtdesc.c
index 4c4af9fce0..462838682e 100644
--- a/src/backend/access/rmgrdesc/nbtdesc.c
+++ b/src/backend/access/rmgrdesc/nbtdesc.c
@@ -110,9 +110,10 @@ btree_desc(StringInfo buf, XLogReaderState *record)
 
 				xlrec = (xl_btree_metadata *) XLogRecGetBlockData(record, 0,
 																  NULL);
-				appendStringInfo(buf, "oldest_btpo_xact %u; last_cleanup_num_heap_tuples: %f",
+				appendStringInfo(buf, "oldest_btpo_xact %u; last_cleanup_num_heap_tuples: %f; last_deletion_nblocks: %u",
 								 xlrec->oldest_btpo_xact,
-								 xlrec->last_cleanup_num_heap_tuples);
+								 xlrec->last_cleanup_num_heap_tuples,
+								 xlrec->last_deletion_nblocks);
 				break;
 			}
 	}
diff --git a/src/include/access/nbtree.h b/src/include/access/nbtree.h
index b8247537fd..a56baea310 100644
--- a/src/include/access/nbtree.h
+++ b/src/include/access/nbtree.h
@@ -109,6 +109,8 @@ typedef struct BTMetaPageData
 	float8		btm_last_cleanup_num_heap_tuples;	/* number of heap tuples
 													 * during last cleanup */
 	bool		btm_allequalimage;	/* are all columns "equalimage"? */
+	BlockNumber	btm_last_deletion_nblocks;	/* number of blocks during last
+											 * bulk-deletion */
 } BTMetaPageData;
 
 #define BTPageGetMeta(p) \
diff --git a/src/include/access/nbtxlog.h b/src/include/access/nbtxlog.h
index f5d3e9f5e0..45f01a3dc9 100644
--- a/src/include/access/nbtxlog.h
+++ b/src/include/access/nbtxlog.h
@@ -55,6 +55,7 @@ typedef struct xl_btree_metadata
 	TransactionId oldest_btpo_xact;
 	float8		last_cleanup_num_heap_tuples;
 	bool		allequalimage;
+	BlockNumber last_deletion_nblocks;
 } xl_btree_metadata;
 
 /*
-- 
2.27.0

