From 5ae5dde505ded1f555324382f9db6e7fbd114492 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Wed, 23 Dec 2020 20:42:53 -0800
Subject: [PATCH 7/8] btvacuumstrategy() bottom-up index deletion changes

---
 src/backend/access/heap/vacuumlazy.c | 69 +++++++++++++++++++++++++---
 src/backend/access/nbtree/nbtree.c   | 35 ++++++++++++--
 src/backend/commands/vacuum.c        |  6 +++
 3 files changed, 100 insertions(+), 10 deletions(-)

diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 93c4488e39..c45c49d561 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -358,11 +358,13 @@ static void lazy_scan_heap(Relation onerel, VacuumParams *params,
 						   LVRelStats *vacrelstats, Relation *Irel, int nindexes,
 						   bool aggressive);
 static void choose_vacuum_strategy(LVRelStats *vacrelstats, VacuumParams *params,
-								   Relation *Irel, int nindexes);
+								   Relation *Irel, int nindexes, double live_tuples,
+								   int maxdeadpage);
 static void lazy_vacuum_table_and_indexes(Relation onerel, VacuumParams *params,
 										  LVRelStats *vacrelstats, Relation *Irel,
 										  int nindexes, IndexBulkDeleteResult **stats,
-										  LVParallelState *lps);
+										  LVParallelState *lps, double live_tuples,
+										  int maxdeadpage);
 static void lazy_vacuum_heap(Relation onerel, LVRelStats *vacrelstats);
 static bool lazy_check_needs_freeze(Buffer buf, bool *hastup,
 									LVRelStats *vacrelstats);
@@ -781,6 +783,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	BlockNumber empty_pages,
 				vacuumed_pages,
 				next_fsm_block_to_vacuum;
+	int			maxdeadpage = 0; /* controls if we skip heap vacuum scan */
 	double		num_tuples,		/* total number of nonremovable tuples */
 				live_tuples,	/* live tuples (reltuples estimate) */
 				tups_vacuumed,	/* tuples cleaned up by vacuum */
@@ -1080,7 +1083,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 			/* Vacuum the table and its indexes */
 			lazy_vacuum_table_and_indexes(onerel, params, vacrelstats,
 										  Irel, nindexes, indstats,
-										  lps);
+										  lps, live_tuples, maxdeadpage);
 
 			/*
 			 * Vacuum the Free Space Map to make newly-freed space visible on
@@ -1666,6 +1669,9 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 		 */
 		if (dead_tuples->num_tuples == prev_dead_count)
 			RecordPageWithFreeSpace(onerel, blkno, freespace);
+		else
+			maxdeadpage = Max(maxdeadpage,
+							  dead_tuples->num_tuples - prev_dead_count);
 	}
 
 	/* report that everything is scanned and vacuumed */
@@ -1707,7 +1713,7 @@ lazy_scan_heap(Relation onerel, VacuumParams *params, LVRelStats *vacrelstats,
 	if (dead_tuples->num_tuples > 0)
 		lazy_vacuum_table_and_indexes(onerel, params, vacrelstats,
 									  Irel, nindexes, indstats,
-									  lps);
+									  lps, live_tuples, maxdeadpage);
 
 	/*
 	 * Vacuum the remainder of the Free Space Map.  We must do this whether or
@@ -1780,14 +1786,16 @@ static void
 lazy_vacuum_table_and_indexes(Relation onerel, VacuumParams *params,
 							  LVRelStats *vacrelstats, Relation *Irel,
 							  int nindexes, IndexBulkDeleteResult **indstats,
-							  LVParallelState *lps)
+							  LVParallelState *lps, double live_tuples,
+							  int maxdeadpage)
 {
 	/*
 	 * Choose the vacuum strategy for this vacuum cycle.
 	 * choose_vacuum_strategy will set the decision to
 	 * vacrelstats->vacuum_heap.
 	 */
-	choose_vacuum_strategy(vacrelstats, params, Irel, nindexes);
+	choose_vacuum_strategy(vacrelstats, params, Irel, nindexes, live_tuples,
+						   maxdeadpage);
 
 	/* Work on all the indexes, then the heap */
 	lazy_vacuum_all_indexes(onerel, Irel, indstats, vacrelstats, lps,
@@ -1825,7 +1833,8 @@ lazy_vacuum_table_and_indexes(Relation onerel, VacuumParams *params,
  */
 static void
 choose_vacuum_strategy(LVRelStats *vacrelstats, VacuumParams *params,
-					   Relation *Irel, int nindexes)
+					   Relation *Irel, int nindexes, double live_tuples,
+					   int maxdeadpage)
 {
 	bool vacuum_heap = true;
 
@@ -1865,6 +1874,52 @@ choose_vacuum_strategy(LVRelStats *vacrelstats, VacuumParams *params,
 				break;
 			}
 		}
+
+		/*
+		 * XXX: This 130 test is for the maximum number of LP_DEAD items on
+		 * any one heap page encountered during heap scan by caller.  The
+		 * general idea here is to preserve the original pristine state of the
+		 * table when it is subject to constant non-HOT updates when heap fill
+		 * factor is reduced from its default.
+		 *
+		 * If we do this right (and with bottom-up index deletion), the
+		 * overall effect for non-HOT-update heavy workloads is that both
+		 * table and indexes (or at least a subset of indexes on the table
+		 * that are never logically modified by the updates) never grow even
+		 * by one block.  We can actually make those things perfectly stable
+		 * over time in the absence of queries that hold open MVCC snapshots
+		 * for a long time.  Stability is perhaps the most important thing
+		 * here (not performance per se).
+		 *
+		 * The exact number used here (130) is based on the assumption that
+		 * heap fillfactor is set to 90 in this table -- we can fit roughly
+		 * 200 "extra" LP_DEAD items on heap pages before they start to
+		 * "overflow" with that setting (e.g. before a pgbench_accounts table
+		 * that is subject to constant non-HOT updates needs to allocate new
+		 * pages just for new versions).  We're trying to avoid having VACUUM
+		 * call lazy_vacuum_heap() in most cases, but we don't want to be too
+		 * aggressive: it would be risky to make the value we test for much
+		 * higher/closer to ~200, since it might be too late by the time we
+		 * actually call lazy_vacuum_heap().  (Unsure of this, but that's the
+		 * idea, at least.)
+		 *
+		 * Since we're mostly worried about stability over time here, we have
+		 * to be worried about "small" effects.  If there are just a few heap
+		 * page overflows in each VACUUM cycle, that still means that heap
+		 * page overflows are _possible_.  It is perhaps only a matter of time
+		 * until the heap becomes almost as fragmented as it would with a heap
+		 * fill factor of 100 -- so "small" effects may be really important.
+		 * (Just guessing here, but I can say for sure that the bottom-up
+		 * deletion patch works that way, so it is an "educated guess".)
+		 */
+		if (!vacuum_heap)
+		{
+			if (maxdeadpage > 130 ||
+				/* Also check if maintenance_work_mem space is running out */
+				vacrelstats->dead_tuples->num_tuples >
+				vacrelstats->dead_tuples->max_tuples / 2)
+				vacuum_heap = true;
+		}
 	}
 
 	vacrelstats->vacuum_heap = vacuum_heap;
diff --git a/src/backend/access/nbtree/nbtree.c b/src/backend/access/nbtree/nbtree.c
index 420457c1a2..ee071cb463 100644
--- a/src/backend/access/nbtree/nbtree.c
+++ b/src/backend/access/nbtree/nbtree.c
@@ -878,12 +878,35 @@ _bt_vacuum_needs_cleanup(IndexVacuumInfo *info)
 }
 
 /*
- * Choose the vacuum strategy. Currently always do ambulkdelete.
+ * Choose the vacuum strategy
  */
 IndexVacuumStrategy
 btvacuumstrategy(IndexVacuumInfo *info)
 {
-	return INDEX_VACUUM_BULKDELETE;
+	Relation	rel = info->index;
+
+	/*
+	 * This strcmp() is a quick and dirty prototype of logic that decides
+	 * whether or not the index needs to get a bulk deletion during this
+	 * VACUUM.  A real version of this logic could work by remembering the
+	 * size of the index during the last VACUUM.  It would only return
+	 * INDEX_VACUUM_BULKDELETE to choose_vacuum_strategy()/vacuumlazy.c iff it
+	 * found that the index is now larger than it was last time around, even
+	 * by one single block.  (It could get a lot more sophisticated than that,
+	 * for example by trying to understand UPDATEs vs DELETEs, but a very
+	 * simple approach is probably almost as useful to users.)
+	 *
+	 * Further details on the strcmp() and my benchmarking:
+	 *
+	 * The index named abalance_ruin is the only index that receives logical
+	 * changes in my pgbench benchmarks.  It is one index among several on
+	 * pgbench_accounts.  It covers the abalance column, which makes almost
+	 * 100% of all UPDATEs non-HOT UPDATEs.
+	 */
+	if (strcmp(RelationGetRelationName(rel), "abalance_ruin") == 0)
+		return INDEX_VACUUM_BULKDELETE;
+
+	return INDEX_VACUUM_NONE;
 }
 
 /*
@@ -903,8 +926,14 @@ btbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
 	/*
 	 * Skip deleting index entries if the corresponding heap tuples will
 	 * not be deleted.
+	 *
+	 * XXX: Maybe we need to remember the decision made in btvacuumstrategy()
+	 * in an AM-generic way, or using some standard idiom that is owned by the
+	 * index AM?  The strcmp() here repeats work done in btvacuumstrategy(),
+	 * which is not ideal.
 	 */
-	if (info->bulkdelete_skippable)
+	if (info->bulkdelete_skippable &&
+		strcmp(RelationGetRelationName(rel), "abalance_ruin") != 0)
 		return NULL;
 
 	/* allocate stats if first time through, else re-use existing struct */
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 6a182ba9cd..223b7cb820 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1875,6 +1875,11 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
 	 */
 	if (params->index_cleanup == VACOPT_TERNARY_DEFAULT)
 	{
+		/*
+		 * XXX had to comment this out to get choose_vacuum_strategy() to do
+		 * the right thing
+		 */
+#if 0
 		if (onerel->rd_options != NULL)
 		{
 			if (((StdRdOptions *) onerel->rd_options)->vacuum_index_cleanup)
@@ -1882,6 +1887,7 @@ vacuum_rel(Oid relid, RangeVar *relation, VacuumParams *params)
 			else
 				params->index_cleanup = VACOPT_TERNARY_DISABLED;
 		}
+#endif
 	}
 
 	/* Set truncate option based on reloptions if not yet */
-- 
2.27.0

