From 4affd72e80405f6593866c21391789a5afe471f0 Mon Sep 17 00:00:00 2001
From: Peter Geoghegan <pg@bowt.ie>
Date: Sun, 21 Nov 2021 14:47:11 -0800
Subject: [PATCH v3 2/2] Improve log_autovacuum_min_duration output.

Add instrumentation of "missed dead tuples", and the number of pages
that had at least one such tuple.  These are fully DEAD (not just
RECENTLY_DEAD) tuples with storage that could not be pruned due to an
inability to acquire a cleanup lock.  This is a replacement for the
"skipped due to pin" instrumentation removed by the previous commit.
Note that the new instrumentation doesn't say anything about pages that
we failed to acquire a cleanup lock on when we see that there were no
missed dead tuples on the page.

Also report on visibility map pages skipped by VACUUM, without regard
for whether the pages were all-frozen or just all-visible.

Also report when and how relfrozenxid is advanced by VACUUM, including
non-aggressive VACUUM.  Apart from being useful on its own, this might
enable future work that teaches non-aggressive VACUUM to be more
concerned about advancing relfrozenxid sooner rather than later.

Also enhance how we report OldestXmin cutoff by putting it in context:
show how far behind it is at the _end_ of the VACUUM operation.

Deliberately don't do anything with VACUUM VERBOSE in this commit, since
a pending patch will generalize the log_autovacuum_min_duration code to
produce VACUUM VERBOSE output as well [1].  That'll get committed first.

[1] https://commitfest.postgresql.org/36/3431/
---
 src/include/commands/vacuum.h        |   2 +
 src/backend/access/heap/vacuumlazy.c | 100 +++++++++++++++++++--------
 src/backend/commands/analyze.c       |   3 +
 src/backend/commands/vacuum.c        |   9 +++
 4 files changed, 84 insertions(+), 30 deletions(-)

diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 4cfd52eaf..bc625463e 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -263,6 +263,8 @@ extern void vac_update_relstats(Relation relation,
 								bool hasindex,
 								TransactionId frozenxid,
 								MultiXactId minmulti,
+								bool *frozenxid_updated,
+								bool *minmulti_updated,
 								bool in_outer_xact);
 extern void vacuum_set_xid_limits(Relation rel,
 								  int freeze_min_age, int freeze_table_age,
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 1fb8735a2..56da89f04 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -330,6 +330,7 @@ typedef struct LVRelState
 	BlockNumber frozenskipped_pages;	/* # frozen pages skipped via VM */
 	BlockNumber pages_removed;	/* pages remove by truncation */
 	BlockNumber lpdead_item_pages;	/* # pages with LP_DEAD items */
+	BlockNumber missed_dead_pages;	/* # pages with missed dead tuples */
 	BlockNumber nonempty_pages; /* actually, last nonempty page + 1 */
 
 	/* Statistics output by us, for table */
@@ -343,8 +344,8 @@ typedef struct LVRelState
 	/* Counters that follow are only for scanned_pages */
 	int64		tuples_deleted; /* # deleted from table */
 	int64		lpdead_items;	/* # deleted from indexes */
-	int64		new_dead_tuples;	/* new estimated total # of dead items in
-									 * table */
+	int64		recently_dead_tuples;	/* # dead, but not yet removable */
+	int64		missed_dead_tuples;	/* # removable, but not removed */
 	int64		num_tuples;		/* total number of nonremovable tuples */
 	int64		live_tuples;	/* live tuples (reltuples estimate) */
 } LVRelState;
@@ -472,6 +473,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	double		read_rate,
 				write_rate;
 	bool		aggressive;
+	bool		frozenxid_updated,
+				minmulti_updated;
 	BlockNumber orig_rel_pages;
 	char	  **indnames = NULL;
 	TransactionId xidFullScanLimit;
@@ -681,9 +684,11 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	{
 		/* Cannot advance relfrozenxid/relminmxid -- just update pg_class */
 		Assert(!aggressive);
+		frozenxid_updated = minmulti_updated = false;
 		vac_update_relstats(rel, new_rel_pages, new_live_tuples,
 							new_rel_allvisible, vacrel->nindexes > 0,
-							InvalidTransactionId, InvalidMultiXactId, false);
+							InvalidTransactionId, InvalidMultiXactId,
+							NULL, NULL, false);
 	}
 	else
 	{
@@ -692,7 +697,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 			   orig_rel_pages);
 		vac_update_relstats(rel, new_rel_pages, new_live_tuples,
 							new_rel_allvisible, vacrel->nindexes > 0,
-							FreezeLimit, MultiXactCutoff, false);
+							FreezeLimit, MultiXactCutoff,
+							&frozenxid_updated, &minmulti_updated, false);
 	}
 
 	/*
@@ -708,7 +714,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	pgstat_report_vacuum(RelationGetRelid(rel),
 						 rel->rd_rel->relisshared,
 						 Max(new_live_tuples, 0),
-						 vacrel->new_dead_tuples);
+						 vacrel->recently_dead_tuples +
+						 vacrel->missed_dead_tuples);
 	pgstat_progress_end_command();
 
 	/* and log the action if appropriate */
@@ -722,6 +729,7 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 		{
 			StringInfoData buf;
 			char	   *msgfmt;
+			int32		   diff;
 
 			TimestampDifference(starttime, endtime, &secs, &usecs);
 
@@ -768,16 +776,40 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 							 vacrel->relnamespace,
 							 vacrel->relname,
 							 vacrel->num_index_scans);
-			appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped frozen\n"),
+			appendStringInfo(&buf, _("pages: %u removed, %u remain, %u skipped using visibility map (%.2f%% of total)\n"),
 							 vacrel->pages_removed,
 							 vacrel->rel_pages,
-							 vacrel->frozenskipped_pages);
+							 orig_rel_pages - vacrel->scanned_pages,
+							 orig_rel_pages == 0 ? 0 :
+							 100.0 * (orig_rel_pages - vacrel->scanned_pages) / orig_rel_pages);
 			appendStringInfo(&buf,
-							 _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable, oldest xmin: %u\n"),
+							 _("tuples: %lld removed, %lld remain, %lld are dead but not yet removable\n"),
 							 (long long) vacrel->tuples_deleted,
 							 (long long) vacrel->new_rel_tuples,
-							 (long long) vacrel->new_dead_tuples,
-							 OldestXmin);
+							 (long long) vacrel->recently_dead_tuples);
+			if (vacrel->missed_dead_tuples > 0)
+				appendStringInfo(&buf,
+								 _("tuples missed: %lld dead from %u contended pages\n"),
+								 (long long) vacrel->missed_dead_tuples,
+								 vacrel->missed_dead_pages);
+			diff = (int32) (ReadNextTransactionId() - OldestXmin);
+			appendStringInfo(&buf,
+							 _("removal cutoff: oldest xmin was %u, which is now %d xact IDs behind\n"),
+							 OldestXmin, diff);
+			if (frozenxid_updated)
+			{
+				diff = (int32) (FreezeLimit - vacrel->relfrozenxid);
+				appendStringInfo(&buf,
+								 _("relfrozenxid: advanced by %d xact IDs, new value: %u\n"),
+								 diff, FreezeLimit);
+			}
+			if (minmulti_updated)
+			{
+				diff = (int32) (MultiXactCutoff - vacrel->relminmxid);
+				appendStringInfo(&buf,
+								 _("relminmxid: advanced by %d multixact IDs, new value: %u\n"),
+								 diff, MultiXactCutoff);
+			}
 			if (orig_rel_pages > 0)
 			{
 				if (vacrel->do_index_vacuuming)
@@ -935,13 +967,15 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params)
 	vacrel->frozenskipped_pages = 0;
 	vacrel->pages_removed = 0;
 	vacrel->lpdead_item_pages = 0;
+	vacrel->missed_dead_pages = 0;
 	vacrel->nonempty_pages = 0;
 
 	/* Initialize instrumentation counters */
 	vacrel->num_index_scans = 0;
 	vacrel->tuples_deleted = 0;
 	vacrel->lpdead_items = 0;
-	vacrel->new_dead_tuples = 0;
+	vacrel->recently_dead_tuples = 0;
+	vacrel->missed_dead_tuples = 0;
 	vacrel->num_tuples = 0;
 	vacrel->live_tuples = 0;
 
@@ -1485,7 +1519,8 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params)
 	 * (unlikely) scenario that new_live_tuples is -1, take it as zero.
 	 */
 	vacrel->new_rel_tuples =
-		Max(vacrel->new_live_tuples, 0) + vacrel->new_dead_tuples;
+		Max(vacrel->new_live_tuples, 0) + vacrel->recently_dead_tuples +
+		vacrel->missed_dead_tuples;
 
 	/*
 	 * Release any remaining pin on visibility map page.
@@ -1549,7 +1584,8 @@ lazy_scan_heap(LVRelState *vacrel, VacuumParams *params)
 	initStringInfo(&buf);
 	appendStringInfo(&buf,
 					 _("%lld dead row versions cannot be removed yet, oldest xmin: %u\n"),
-					 (long long) vacrel->new_dead_tuples, vacrel->OldestXmin);
+					 (long long) vacrel->recently_dead_tuples,
+					 vacrel->OldestXmin);
 	appendStringInfo(&buf, _("%s."), pg_rusage_show(&ru0));
 
 	ereport(elevel,
@@ -1731,7 +1767,7 @@ lazy_scan_prune(LVRelState *vacrel,
 	HTSV_Result res;
 	int			tuples_deleted,
 				lpdead_items,
-				new_dead_tuples,
+				recently_dead_tuples,
 				num_tuples,
 				live_tuples;
 	int			nnewlpdead;
@@ -1748,7 +1784,7 @@ retry:
 	/* Initialize (or reset) page-level counters */
 	tuples_deleted = 0;
 	lpdead_items = 0;
-	new_dead_tuples = 0;
+	recently_dead_tuples = 0;
 	num_tuples = 0;
 	live_tuples = 0;
 
@@ -1907,11 +1943,11 @@ retry:
 			case HEAPTUPLE_RECENTLY_DEAD:
 
 				/*
-				 * If tuple is recently deleted then we must not remove it
-				 * from relation.  (We only remove items that are LP_DEAD from
+				 * If tuple is recently dead then we must not remove it from
+				 * the relation.  (We only remove items that are LP_DEAD from
 				 * pruning.)
 				 */
-				new_dead_tuples++;
+				recently_dead_tuples++;
 				prunestate->all_visible = false;
 				break;
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
@@ -2087,7 +2123,7 @@ retry:
 	/* Finally, add page-local counts to whole-VACUUM counts */
 	vacrel->tuples_deleted += tuples_deleted;
 	vacrel->lpdead_items += lpdead_items;
-	vacrel->new_dead_tuples += new_dead_tuples;
+	vacrel->recently_dead_tuples += recently_dead_tuples;
 	vacrel->num_tuples += num_tuples;
 	vacrel->live_tuples += live_tuples;
 }
@@ -2141,7 +2177,8 @@ lazy_scan_noprune(LVRelState *vacrel,
 	int			lpdead_items,
 				num_tuples,
 				live_tuples,
-				new_dead_tuples;
+				recently_dead_tuples,
+				missed_dead_tuples;
 	HeapTupleHeader tupleheader;
 	OffsetNumber deadoffsets[MaxHeapTuplesPerPage];
 
@@ -2153,7 +2190,8 @@ lazy_scan_noprune(LVRelState *vacrel,
 	lpdead_items = 0;
 	num_tuples = 0;
 	live_tuples = 0;
-	new_dead_tuples = 0;
+	recently_dead_tuples = 0;
+	missed_dead_tuples = 0;
 
 	maxoff = PageGetMaxOffsetNumber(page);
 	for (offnum = FirstOffsetNumber;
@@ -2222,16 +2260,15 @@ lazy_scan_noprune(LVRelState *vacrel,
 				/*
 				 * There is some useful work for pruning to do, that won't be
 				 * done due to failure to get a cleanup lock.
-				 *
-				 * TODO Add dedicated instrumentation for this case
 				 */
+				missed_dead_tuples++;
 				break;
 			case HEAPTUPLE_RECENTLY_DEAD:
 
 				/*
-				 * Count in new_dead_tuples, just like lazy_scan_prune
+				 * Count in recently_dead_tuples, just like lazy_scan_prune
 				 */
-				new_dead_tuples++;
+				recently_dead_tuples++;
 				break;
 			case HEAPTUPLE_INSERT_IN_PROGRESS:
 
@@ -2286,7 +2323,7 @@ lazy_scan_noprune(LVRelState *vacrel,
 			*hastup = true;
 		*hasfreespace = true;
 		num_tuples += lpdead_items;
-		/* TODO HEAPTUPLE_DEAD style instrumentation needed here, too */
+		missed_dead_tuples += lpdead_items;
 	}
 	else if (lpdead_items > 0)
 	{
@@ -2321,9 +2358,12 @@ lazy_scan_noprune(LVRelState *vacrel,
 	/*
 	 * Finally, add relevant page-local counts to whole-VACUUM counts
 	 */
-	vacrel->new_dead_tuples += new_dead_tuples;
+	vacrel->recently_dead_tuples += recently_dead_tuples;
+	vacrel->missed_dead_tuples += missed_dead_tuples;
 	vacrel->num_tuples += num_tuples;
 	vacrel->live_tuples += live_tuples;
+	if (missed_dead_tuples > 0)
+		vacrel->missed_dead_pages++;
 
 	/* Caller won't need to call lazy_scan_prune with same page */
 	return true;
@@ -2397,8 +2437,8 @@ lazy_vacuum(LVRelState *vacrel)
 		 * dead_items space is not CPU cache resident.
 		 *
 		 * We don't take any special steps to remember the LP_DEAD items (such
-		 * as counting them in new_dead_tuples report to the stats collector)
-		 * when the optimization is applied.  Though the accounting used in
+		 * as counting them in our final report to the stats collector) when
+		 * the optimization is applied.  Though the accounting used in
 		 * analyze.c's acquire_sample_rows() will recognize the same LP_DEAD
 		 * items as dead rows in its own stats collector report, that's okay.
 		 * The discrepancy should be negligible.  If this optimization is ever
@@ -4058,7 +4098,7 @@ update_index_statistics(LVRelState *vacrel)
 							false,
 							InvalidTransactionId,
 							InvalidMultiXactId,
-							false);
+							NULL, NULL, false);
 	}
 }
 
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index cd77907fc..afd1cb8f5 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -651,6 +651,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 							hasindex,
 							InvalidTransactionId,
 							InvalidMultiXactId,
+							NULL, NULL,
 							in_outer_xact);
 
 		/* Same for indexes */
@@ -667,6 +668,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 								false,
 								InvalidTransactionId,
 								InvalidMultiXactId,
+								NULL, NULL,
 								in_outer_xact);
 		}
 	}
@@ -679,6 +681,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 		vac_update_relstats(onerel, -1, totalrows,
 							0, hasindex, InvalidTransactionId,
 							InvalidMultiXactId,
+							NULL, NULL,
 							in_outer_xact);
 	}
 
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index 5c4bc15b4..8bd4bd12c 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1308,6 +1308,7 @@ vac_update_relstats(Relation relation,
 					BlockNumber num_all_visible_pages,
 					bool hasindex, TransactionId frozenxid,
 					MultiXactId minmulti,
+					bool *frozenxid_updated, bool *minmulti_updated,
 					bool in_outer_xact)
 {
 	Oid			relid = RelationGetRelid(relation);
@@ -1383,22 +1384,30 @@ vac_update_relstats(Relation relation,
 	 * This should match vac_update_datfrozenxid() concerning what we consider
 	 * to be "in the future".
 	 */
+	if (frozenxid_updated)
+		*frozenxid_updated = false;
 	if (TransactionIdIsNormal(frozenxid) &&
 		pgcform->relfrozenxid != frozenxid &&
 		(TransactionIdPrecedes(pgcform->relfrozenxid, frozenxid) ||
 		 TransactionIdPrecedes(ReadNextTransactionId(),
 							   pgcform->relfrozenxid)))
 	{
+		if (frozenxid_updated)
+			*frozenxid_updated = true;
 		pgcform->relfrozenxid = frozenxid;
 		dirty = true;
 	}
 
 	/* Similarly for relminmxid */
+	if (minmulti_updated)
+		*minmulti_updated = false;
 	if (MultiXactIdIsValid(minmulti) &&
 		pgcform->relminmxid != minmulti &&
 		(MultiXactIdPrecedes(pgcform->relminmxid, minmulti) ||
 		 MultiXactIdPrecedes(ReadNextMultiXactId(), pgcform->relminmxid)))
 	{
+		if (minmulti_updated)
+			*minmulti_updated = true;
 		pgcform->relminmxid = minmulti;
 		dirty = true;
 	}
-- 
2.30.2

