From 2ed8278b4fb44050c4081b021bf59bcc33fd107e Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Thu, 16 Jan 2025 16:19:57 -0500
Subject: [PATCH v4 1/2] Add relallfrozen to pg_class

Add relallfrozen, an estimate of the number of pages marked all-frozen
in the visibility map.

pg_class already has relallvisible, an estimate of the number of pages
in the relation marked all-visible in the visibility map. This is used
primarily for planning.

relallfrozen, however, is useful for estimating the outstanding number
of all-visible but not all-frozen pages in the relation for the purposes
of scheduling manual VACUUMs and tuning vacuum freeze parameters.

In the future, this field could be used to trigger more frequent vacuums
on insert-focused workloads with significant volume of frozen data.

Reviewed-by: Greg Sabino Mullane
---
 doc/src/sgml/catalogs.sgml              | 20 ++++++++++++++++++++
 src/backend/access/heap/vacuumlazy.c    | 17 +++++++++++++----
 src/backend/catalog/heap.c              |  2 ++
 src/backend/catalog/index.c             | 22 +++++++++++++++++-----
 src/backend/commands/analyze.c          | 12 ++++++------
 src/backend/commands/cluster.c          |  5 +++++
 src/backend/commands/vacuum.c           | 15 ++++++++++++++-
 src/backend/statistics/relation_stats.c | 23 ++++++++++++++++++++---
 src/backend/utils/cache/relcache.c      |  2 ++
 src/include/catalog/pg_class.h          |  8 ++++++++
 src/include/commands/vacuum.h           |  1 +
 11 files changed, 108 insertions(+), 19 deletions(-)

diff --git a/doc/src/sgml/catalogs.sgml b/doc/src/sgml/catalogs.sgml
index d3036c5ba9d..d218d3702e9 100644
--- a/doc/src/sgml/catalogs.sgml
+++ b/doc/src/sgml/catalogs.sgml
@@ -2064,6 +2064,26 @@ SCRAM-SHA-256$<replaceable>&lt;iteration count&gt;</replaceable>:<replaceable>&l
       </para></entry>
      </row>
 
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>relallfrozen</structfield> <type>int4</type>
+      </para>
+      <para>
+       Number of pages that are marked all-frozen in the table's visibility
+       map.  This is only an estimate used for triggering autovacuums. It is
+       updated by <link linkend="sql-vacuum"><command>VACUUM</command></link>,
+       <link linkend="sql-analyze"><command>ANALYZE</command></link>,
+       and a few DDL commands such as
+       <link linkend="sql-createindex"><command>CREATE INDEX</command></link>.
+       <structfield>relallfrozen</structfield> must be less than or equal to
+       <strutfield>relallvisible</structfield> as an all-frozen page must be
+       all-visible. If <structfield>relallvisible</structfield> was updated
+       manually, <structfield>relallfrozen</structfield> will be -1 until the
+       next time they are updated.
+      </para></entry>
+     </row>
+
+
      <row>
       <entry role="catalog_table_entry"><para role="column_definition">
        <structfield>reltoastrelid</structfield> <type>oid</type>
diff --git a/src/backend/access/heap/vacuumlazy.c b/src/backend/access/heap/vacuumlazy.c
index 5b0e790e121..ec73471c2c4 100644
--- a/src/backend/access/heap/vacuumlazy.c
+++ b/src/backend/access/heap/vacuumlazy.c
@@ -370,7 +370,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 				minmulti_updated;
 	BlockNumber orig_rel_pages,
 				new_rel_pages,
-				new_rel_allvisible;
+				new_rel_allvisible,
+				new_rel_allfrozen;
 	PGRUsage	ru0;
 	TimestampTz starttime = 0;
 	PgStat_Counter startreadtime = 0,
@@ -629,10 +630,17 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 * pg_class.relpages to
 	 */
 	new_rel_pages = vacrel->rel_pages;	/* After possible rel truncation */
-	visibilitymap_count(rel, &new_rel_allvisible, NULL);
+	visibilitymap_count(rel, &new_rel_allvisible, &new_rel_allfrozen);
 	if (new_rel_allvisible > new_rel_pages)
 		new_rel_allvisible = new_rel_pages;
 
+	/*
+	 * An all-frozen block _must_ be all-visible. As such, clamp the count of
+	 * all-frozen blocks to the count of all-visible blocks.
+	 */
+	if (new_rel_allfrozen > new_rel_allvisible)
+		new_rel_allfrozen = new_rel_allvisible;
+
 	/*
 	 * Now actually update rel's pg_class entry.
 	 *
@@ -641,7 +649,8 @@ heap_vacuum_rel(Relation rel, VacuumParams *params,
 	 * scan every page that isn't skipped using the visibility map.
 	 */
 	vac_update_relstats(rel, new_rel_pages, vacrel->new_live_tuples,
-						new_rel_allvisible, vacrel->nindexes > 0,
+						new_rel_allvisible, new_rel_allfrozen,
+						vacrel->nindexes > 0,
 						vacrel->NewRelfrozenXid, vacrel->NewRelminMxid,
 						&frozenxid_updated, &minmulti_updated, false);
 
@@ -3248,7 +3257,7 @@ update_relstats_all_indexes(LVRelState *vacrel)
 		vac_update_relstats(indrel,
 							istat->num_pages,
 							istat->num_index_tuples,
-							0,
+							0, 0,
 							false,
 							InvalidTransactionId,
 							InvalidMultiXactId,
diff --git a/src/backend/catalog/heap.c b/src/backend/catalog/heap.c
index 57ef466acce..5ab75d5c977 100644
--- a/src/backend/catalog/heap.c
+++ b/src/backend/catalog/heap.c
@@ -913,6 +913,7 @@ InsertPgClassTuple(Relation pg_class_desc,
 	values[Anum_pg_class_relpages - 1] = Int32GetDatum(rd_rel->relpages);
 	values[Anum_pg_class_reltuples - 1] = Float4GetDatum(rd_rel->reltuples);
 	values[Anum_pg_class_relallvisible - 1] = Int32GetDatum(rd_rel->relallvisible);
+	values[Anum_pg_class_relallfrozen - 1] = Int32GetDatum(rd_rel->relallfrozen);
 	values[Anum_pg_class_reltoastrelid - 1] = ObjectIdGetDatum(rd_rel->reltoastrelid);
 	values[Anum_pg_class_relhasindex - 1] = BoolGetDatum(rd_rel->relhasindex);
 	values[Anum_pg_class_relisshared - 1] = BoolGetDatum(rd_rel->relisshared);
@@ -983,6 +984,7 @@ AddNewRelationTuple(Relation pg_class_desc,
 	new_rel_reltup->relpages = 0;
 	new_rel_reltup->reltuples = -1;
 	new_rel_reltup->relallvisible = 0;
+	new_rel_reltup->relallfrozen = 0;
 
 	/* Sequences always have a known size */
 	if (relkind == RELKIND_SEQUENCE)
diff --git a/src/backend/catalog/index.c b/src/backend/catalog/index.c
index 7377912b41e..ee72a1e5e41 100644
--- a/src/backend/catalog/index.c
+++ b/src/backend/catalog/index.c
@@ -2792,8 +2792,8 @@ FormIndexDatum(IndexInfo *indexInfo,
  * hasindex: set relhasindex to this value
  * reltuples: if >= 0, set reltuples to this value; else no change
  *
- * If reltuples >= 0, relpages and relallvisible are also updated (using
- * RelationGetNumberOfBlocks() and visibilitymap_count()).
+ * If reltuples >= 0, relpages, relallvisible, and relallfrozen are also
+ * updated (using RelationGetNumberOfBlocks() and visibilitymap_count()).
  *
  * NOTE: an important side-effect of this operation is that an SI invalidation
  * message is sent out to all backends --- including me --- causing relcache
@@ -2811,6 +2811,7 @@ index_update_stats(Relation rel,
 	bool		update_stats;
 	BlockNumber relpages = 0;	/* keep compiler quiet */
 	BlockNumber relallvisible = 0;
+	BlockNumber relallfrozen = 0;
 	Oid			relid = RelationGetRelid(rel);
 	Relation	pg_class;
 	ScanKeyData key[1];
@@ -2850,7 +2851,13 @@ index_update_stats(Relation rel,
 		relpages = RelationGetNumberOfBlocks(rel);
 
 		if (rel->rd_rel->relkind != RELKIND_INDEX)
-			visibilitymap_count(rel, &relallvisible, NULL);
+		{
+			visibilitymap_count(rel, &relallvisible, &relallfrozen);
+
+			/* An all-frozen block must be all-visible in the VM */
+			if (relallfrozen > relallvisible)
+				relallfrozen = relallvisible;
+		}
 	}
 
 	/*
@@ -2875,8 +2882,8 @@ index_update_stats(Relation rel,
 	 * transaction could still fail before committing.  Setting relhasindex
 	 * true is safe even if there are no indexes (VACUUM will eventually fix
 	 * it).  And of course the new relpages and reltuples counts are correct
-	 * regardless.  However, we don't want to change relpages (or
-	 * relallvisible) if the caller isn't providing an updated reltuples
+	 * regardless. However, we don't want to change relpages (or relallvisible
+	 * and relallfrozen) if the caller isn't providing an updated reltuples
 	 * count, because that would bollix the reltuples/relpages ratio which is
 	 * what's really important.
 	 */
@@ -2923,6 +2930,11 @@ index_update_stats(Relation rel,
 			rd_rel->relallvisible = (int32) relallvisible;
 			dirty = true;
 		}
+		if (rd_rel->relallfrozen != (int32) relallfrozen)
+		{
+			rd_rel->relallfrozen = (int32) relallfrozen;
+			dirty = true;
+		}
 	}
 
 	/*
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 2a7769b1fd1..927b35ead34 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -628,12 +628,11 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	 */
 	if (!inh)
 	{
-		BlockNumber relallvisible;
+		BlockNumber relallvisible = 0;
+		BlockNumber relallfrozen = 0;
 
 		if (RELKIND_HAS_STORAGE(onerel->rd_rel->relkind))
-			visibilitymap_count(onerel, &relallvisible, NULL);
-		else
-			relallvisible = 0;
+			visibilitymap_count(onerel, &relallvisible, &relallfrozen);
 
 		/*
 		 * Update pg_class for table relation.  CCI first, in case acquirefunc
@@ -644,6 +643,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 							relpages,
 							totalrows,
 							relallvisible,
+							relallfrozen,
 							hasindex,
 							InvalidTransactionId,
 							InvalidMultiXactId,
@@ -660,7 +660,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 			vac_update_relstats(Irel[ind],
 								RelationGetNumberOfBlocks(Irel[ind]),
 								totalindexrows,
-								0,
+								0, 0,
 								false,
 								InvalidTransactionId,
 								InvalidMultiXactId,
@@ -676,7 +676,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 		 */
 		CommandCounterIncrement();
 		vac_update_relstats(onerel, -1, totalrows,
-							0, hasindex, InvalidTransactionId,
+							0, 0, hasindex, InvalidTransactionId,
 							InvalidMultiXactId,
 							NULL, NULL,
 							in_outer_xact);
diff --git a/src/backend/commands/cluster.c b/src/backend/commands/cluster.c
index 99193f5c886..54a08e4102e 100644
--- a/src/backend/commands/cluster.c
+++ b/src/backend/commands/cluster.c
@@ -1226,6 +1226,7 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
 		int32		swap_pages;
 		float4		swap_tuples;
 		int32		swap_allvisible;
+		int32		swap_allfrozen;
 
 		swap_pages = relform1->relpages;
 		relform1->relpages = relform2->relpages;
@@ -1238,6 +1239,10 @@ swap_relation_files(Oid r1, Oid r2, bool target_is_pg_class,
 		swap_allvisible = relform1->relallvisible;
 		relform1->relallvisible = relform2->relallvisible;
 		relform2->relallvisible = swap_allvisible;
+
+		swap_allfrozen = relform1->relallfrozen;
+		relform1->relallfrozen = relform2->relallfrozen;
+		relform2->relallfrozen = swap_allfrozen;
 	}
 
 	/*
diff --git a/src/backend/commands/vacuum.c b/src/backend/commands/vacuum.c
index e6745e6145c..376ce2e489a 100644
--- a/src/backend/commands/vacuum.c
+++ b/src/backend/commands/vacuum.c
@@ -1410,6 +1410,7 @@ void
 vac_update_relstats(Relation relation,
 					BlockNumber num_pages, double num_tuples,
 					BlockNumber num_all_visible_pages,
+					BlockNumber num_all_frozen_pages,
 					bool hasindex, TransactionId frozenxid,
 					MultiXactId minmulti,
 					bool *frozenxid_updated, bool *minmulti_updated,
@@ -1441,8 +1442,15 @@ vac_update_relstats(Relation relation,
 			 relid);
 	pgcform = (Form_pg_class) GETSTRUCT(ctup);
 
-	/* Apply statistical updates, if any, to copied tuple */
+	/*
+	 * An all-frozen block must be marked all-visible in the VM. While callers
+	 * are likely to check this themselves, it is worth ensuring we don't put
+	 * incorrect information in pg_class.
+	 */
+	if (num_all_frozen_pages > num_all_visible_pages)
+		num_all_frozen_pages = num_all_visible_pages;
 
+	/* Apply statistical updates, if any, to copied tuple */
 	dirty = false;
 	if (pgcform->relpages != (int32) num_pages)
 	{
@@ -1459,6 +1467,11 @@ vac_update_relstats(Relation relation,
 		pgcform->relallvisible = (int32) num_all_visible_pages;
 		dirty = true;
 	}
+	if (pgcform->relallfrozen != (int32) num_all_frozen_pages)
+	{
+		pgcform->relallfrozen = (int32) num_all_frozen_pages;
+		dirty = true;
+	}
 
 	/* Apply DDL updates, but not inside an outer transaction (see above) */
 
diff --git a/src/backend/statistics/relation_stats.c b/src/backend/statistics/relation_stats.c
index 046661d7c3f..758a56f8a46 100644
--- a/src/backend/statistics/relation_stats.c
+++ b/src/backend/statistics/relation_stats.c
@@ -56,6 +56,10 @@ static bool relation_statistics_update(FunctionCallInfo fcinfo, int elevel,
 
 /*
  * Internal function for modifying statistics for a relation.
+ *
+ * Up to four pg_class columns may be updated even though only three relation
+ * statistics may be modified; relallfrozen is always set to -1 when
+ * relallvisible is updated manually.
  */
 static bool
 relation_statistics_update(FunctionCallInfo fcinfo, int elevel, bool inplace)
@@ -167,6 +171,14 @@ relation_statistics_update(FunctionCallInfo fcinfo, int elevel, bool inplace)
 		if (update_relallvisible && pgcform->relallvisible != relallvisible)
 		{
 			pgcform->relallvisible = relallvisible;
+
+			/*
+			 * If we are modifying relallvisible manually, it is not clear
+			 * what relallfrozen value would make sense. Therefore, set it to
+			 * -1 ("unknown"). It will be updated the next time these fields
+			 * are updated.
+			 */
+			pgcform->relallfrozen = -1;
 			dirty = true;
 		}
 
@@ -182,9 +194,9 @@ relation_statistics_update(FunctionCallInfo fcinfo, int elevel, bool inplace)
 		TupleDesc	tupdesc = RelationGetDescr(crel);
 		HeapTuple	ctup;
 		Form_pg_class pgcform;
-		int			replaces[3] = {0};
-		Datum		values[3] = {0};
-		bool		nulls[3] = {0};
+		int			replaces[4] = {0};
+		Datum		values[4] = {0};
+		bool		nulls[4] = {0};
 		int			nreplaces = 0;
 
 		ctup = SearchSysCache1(RELOID, ObjectIdGetDatum(reloid));
@@ -217,6 +229,11 @@ relation_statistics_update(FunctionCallInfo fcinfo, int elevel, bool inplace)
 			replaces[nreplaces] = Anum_pg_class_relallvisible;
 			values[nreplaces] = Int32GetDatum(relallvisible);
 			nreplaces++;
+
+			/* See comment above in-place update of relallfrozen */
+			replaces[nreplaces] = Anum_pg_class_relallfrozen;
+			values[nreplaces] = Int32GetDatum(-1);
+			nreplaces++;
 		}
 
 		if (nreplaces > 0)
diff --git a/src/backend/utils/cache/relcache.c b/src/backend/utils/cache/relcache.c
index 43219a9629c..efd6bcb3860 100644
--- a/src/backend/utils/cache/relcache.c
+++ b/src/backend/utils/cache/relcache.c
@@ -1925,6 +1925,7 @@ formrdesc(const char *relationName, Oid relationReltype,
 	relation->rd_rel->relpages = 0;
 	relation->rd_rel->reltuples = -1;
 	relation->rd_rel->relallvisible = 0;
+	relation->rd_rel->relallfrozen = 0;
 	relation->rd_rel->relkind = RELKIND_RELATION;
 	relation->rd_rel->relnatts = (int16) natts;
 
@@ -3882,6 +3883,7 @@ RelationSetNewRelfilenumber(Relation relation, char persistence)
 			classform->relpages = 0;	/* it's empty until further notice */
 			classform->reltuples = -1;
 			classform->relallvisible = 0;
+			classform->relallfrozen = 0;
 		}
 		classform->relfrozenxid = freezeXid;
 		classform->relminmxid = minmulti;
diff --git a/src/include/catalog/pg_class.h b/src/include/catalog/pg_class.h
index f0d612ca487..fbf42b0e195 100644
--- a/src/include/catalog/pg_class.h
+++ b/src/include/catalog/pg_class.h
@@ -68,6 +68,14 @@ CATALOG(pg_class,1259,RelationRelationId) BKI_BOOTSTRAP BKI_ROWTYPE_OID(83,Relat
 	/* # of all-visible blocks (not always up-to-date) */
 	int32		relallvisible BKI_DEFAULT(0);
 
+	/*
+	 * # of all-frozen blocks (not always up-to-date)
+	 * Starts as 0 (if it has never been updated).
+	 * If relallvisible is manually updated, relallfrozen will be
+	 * -1, meaning "unknown"
+	 */
+	int32		relallfrozen BKI_DEFAULT(0);
+
 	/* OID of toast table; 0 if none */
 	Oid			reltoastrelid BKI_DEFAULT(0) BKI_LOOKUP_OPT(pg_class);
 
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index 12d0b61950d..b2a678df09e 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -329,6 +329,7 @@ extern void vac_update_relstats(Relation relation,
 								BlockNumber num_pages,
 								double num_tuples,
 								BlockNumber num_all_visible_pages,
+								BlockNumber num_all_frozen_pages,
 								bool hasindex,
 								TransactionId frozenxid,
 								MultiXactId minmulti,
-- 
2.45.2

