From c6537417b20a7db114081ea5e76ba87b6f443903 Mon Sep 17 00:00:00 2001
From: TatsuyaKawata <kawatatatsuya0913@gmail.com>
Date: Mon, 19 Jan 2026 23:20:23 +0900
Subject: [PATCH v4] Add sampling statistics to autoanalyze log output

Previously, autoanalyze log messages only showed buffer usage, WAL usage,
and system usage statistics. However, ANALYZE VERBOSE showed additional
sampling statistics including pages scanned, live rows, and dead rows
found during sampling. This made it difficult to understand the sampling
behavior from autoanalyze logs alone.

This patch unifies the logging by adding sampling statistics to the
autoanalyze log output. The new log format includes:
- Number of pages scanned out of total pages
- Live rows and dead rows found during sampling
- Number of rows in sample and estimated total rows

To support this change, a new AnalyzeSamplingStats struct is introduced in
vacuum.h to collect and pass sampling statistics.

Author: Tatsuya Kawata <kawatatatsuya0913@gmail.com>
Reviewed-by: Fujii Masao <masao.fujii@gmail.com>
Reviewed-by: Sami Imseih <samimseih@gmail.com>
Reviewed-by: Chao Li <li.evan.chao@gmail.com>
Discussion: https://www.postgresql.org/message-id/flat/CAHza6qcN%3DPaGqo8CGgrqd%2BnaOwY_pLGiwEq6u%3D%2BASZZNL9zi9A%40mail.gmail.com#26a70a815cc922b7513e71fc0c445ff3
---
 contrib/file_fdw/file_fdw.c         |   9 --
 contrib/postgres_fdw/postgres_fdw.c |   8 --
 src/backend/commands/analyze.c      | 124 ++++++++++++++++++++++------
 src/include/commands/vacuum.h       |  16 ++++
 src/tools/pgindent/typedefs.list    |   1 +
 5 files changed, 116 insertions(+), 42 deletions(-)

diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 33a37d832ce..5db29cc33e2 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -1327,14 +1327,5 @@ file_acquire_sample_rows(Relation onerel, int elevel,
 	pfree(values);
 	pfree(nulls);
 
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": file contains %.0f rows; "
-					"%d rows in sample",
-					RelationGetRelationName(onerel),
-					*totalrows, numrows)));
-
 	return numrows;
 }
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index 3572689e33b..736502b9224 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -5286,14 +5286,6 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 	else
 		*totalrows = reltuples;
 
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": table contains %.0f rows, %d rows in sample",
-					RelationGetRelationName(relation),
-					*totalrows, astate.numrows)));
-
 	return astate.numrows;
 }
 
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index a483424152c..a1f95ba6cad 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -86,11 +86,13 @@ static VacAttrStats *examine_attribute(Relation onerel, int attnum,
 									   Node *index_expr);
 static int	acquire_sample_rows(Relation onerel, int elevel,
 								HeapTuple *rows, int targrows,
-								double *totalrows, double *totaldeadrows);
+								double *totalrows, double *totaldeadrows,
+								AnalyzeSamplingStats *sampling_stats);
 static int	compare_rows(const void *a, const void *b, void *arg);
 static int	acquire_inherited_sample_rows(Relation onerel, int elevel,
 										  HeapTuple *rows, int targrows,
-										  double *totalrows, double *totaldeadrows);
+										  double *totalrows, double *totaldeadrows,
+										  List **sampling_stats_list);
 static void update_attstats(Oid relid, bool inh,
 							int natts, VacAttrStats **vacattrstats);
 static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
@@ -187,9 +189,7 @@ analyze_rel(Oid relid, RangeVar *relation,
 	if (onerel->rd_rel->relkind == RELKIND_RELATION ||
 		onerel->rd_rel->relkind == RELKIND_MATVIEW)
 	{
-		/* Regular table, so we'll use the regular row acquisition function */
-		acquirefunc = acquire_sample_rows;
-		/* Also get regular table's size */
+		/* Get regular table's size */
 		relpages = RelationGetNumberOfBlocks(onerel);
 	}
 	else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
@@ -302,6 +302,8 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 	double		totalrows,
 				totaldeadrows;
 	HeapTuple  *rows;
+	AnalyzeSamplingStats sampling_stats = {0};
+	List	   *sampling_stats_list = NIL;
 	PGRUsage	ru0;
 	TimestampTz starttime = 0;
 	MemoryContext caller_context;
@@ -535,11 +537,17 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 	if (inh)
 		numrows = acquire_inherited_sample_rows(onerel, elevel,
 												rows, targrows,
-												&totalrows, &totaldeadrows);
-	else
+												&totalrows, &totaldeadrows,
+												&sampling_stats_list);
+	else if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
 		numrows = (*acquirefunc) (onerel, elevel,
 								  rows, targrows,
 								  &totalrows, &totaldeadrows);
+	else
+		numrows = acquire_sample_rows(onerel, elevel,
+									  rows, targrows,
+									  &totalrows, &totaldeadrows,
+									  &sampling_stats);
 
 	/*
 	 * Compute the statistics.  Temporary results during the calculations for
@@ -805,7 +813,12 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 			initStringInfo(&buf);
 
 			if (AmAutoVacuumWorkerProcess())
-				msgfmt = _("automatic analyze of table \"%s.%s.%s\"\n");
+			{
+				if (inh)
+					msgfmt = _("automatic analyze of table \"%s.%s.%s\" inheritance tree\n");
+				else
+					msgfmt = _("automatic analyze of table \"%s.%s.%s\"\n");
+			}
 			else
 				msgfmt = _("finished analyzing table \"%s.%s.%s\"\n");
 
@@ -813,6 +826,49 @@ do_analyze_rel(Relation onerel, const VacuumParams params,
 							 get_database_name(MyDatabaseId),
 							 get_namespace_name(RelationGetNamespace(onerel)),
 							 RelationGetRelationName(onerel));
+
+			/*
+			 * Report sampling statistics based on the table type.
+			 *
+			 * For foreign tables, we can only report the number of rows
+			 * sampled and estimated total, since sampling is done by the FDW.
+			 *
+			 * For inheritance trees and partitioned tables, we report
+			 * per-child sampling statistics collected during sampling.
+			 *
+			 * For regular tables, we report the standard sampling statistics.
+			 */
+			if (onerel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+				appendStringInfo(&buf,
+								 _("sampling: target contains %.0f rows; %d rows in sample\n"),
+								 totalrows, numrows);
+			else if (inh && sampling_stats_list != NIL)
+			{
+				ListCell   *lc;
+
+				foreach(lc, sampling_stats_list)
+				{
+					AnalyzeSamplingStats *stats = (AnalyzeSamplingStats *) lfirst(lc);
+
+					appendStringInfo(&buf,
+									 _("sampling \"%s.%s\": scanned %u of %u pages, "
+									   "containing %.0f live rows and %.0f dead rows; "
+									   "%d rows in sample, %.0f estimated total rows\n"),
+									 get_namespace_name(get_rel_namespace(stats->relid)),
+									 get_rel_name(stats->relid),
+									 stats->scannedpages, stats->totalpages,
+									 stats->liverows, stats->deadrows,
+									 stats->samplerows, stats->totalrows);
+				}
+			}
+			else
+				appendStringInfo(&buf,
+								 _("sampling: scanned %u of %u pages, "
+								   "containing %.0f live rows and %.0f dead rows; "
+								   "%d rows in sample, %.0f estimated total rows\n"),
+								 sampling_stats.scannedpages, sampling_stats.totalpages,
+								 sampling_stats.liverows, sampling_stats.deadrows,
+								 sampling_stats.samplerows, sampling_stats.totalrows);
 			if (track_cost_delay_timing)
 			{
 				/*
@@ -1204,7 +1260,8 @@ block_sampling_read_stream_next(ReadStream *stream,
 static int
 acquire_sample_rows(Relation onerel, int elevel,
 					HeapTuple *rows, int targrows,
-					double *totalrows, double *totaldeadrows)
+					double *totalrows, double *totaldeadrows,
+					AnalyzeSamplingStats *sampling_stats)
 {
 	int			numrows = 0;	/* # rows now in reservoir */
 	double		samplerows = 0; /* total # rows collected */
@@ -1345,17 +1402,13 @@ acquire_sample_rows(Relation onerel, int elevel,
 		*totaldeadrows = 0.0;
 	}
 
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": scanned %d of %u pages, "
-					"containing %.0f live rows and %.0f dead rows; "
-					"%d rows in sample, %.0f estimated total rows",
-					RelationGetRelationName(onerel),
-					bs.m, totalblocks,
-					liverows, deadrows,
-					numrows, *totalrows)));
+	/* Populate sampling statistics output parameters */
+	sampling_stats->totalpages = totalblocks;
+	sampling_stats->scannedpages = bs.m;
+	sampling_stats->liverows = liverows;
+	sampling_stats->deadrows = deadrows;
+	sampling_stats->samplerows = numrows;
+	sampling_stats->totalrows = *totalrows;
 
 	return numrows;
 }
@@ -1396,7 +1449,8 @@ compare_rows(const void *a, const void *b, void *arg)
 static int
 acquire_inherited_sample_rows(Relation onerel, int elevel,
 							  HeapTuple *rows, int targrows,
-							  double *totalrows, double *totaldeadrows)
+							  double *totalrows, double *totaldeadrows,
+							  List **sampling_stats_list)
 {
 	List	   *tableOIDs;
 	Relation   *rels;
@@ -1412,6 +1466,7 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 	/* Initialize output parameters to zero now, in case we exit early */
 	*totalrows = 0;
 	*totaldeadrows = 0;
+	*sampling_stats_list = NIL;
 
 	/*
 	 * Find all members of inheritance set.  We only need AccessShareLock on
@@ -1474,7 +1529,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 			childrel->rd_rel->relkind == RELKIND_MATVIEW)
 		{
 			/* Regular table, so use the regular row acquisition function */
-			acquirefunc = acquire_sample_rows;
 			relpages = RelationGetNumberOfBlocks(childrel);
 		}
 		else if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
@@ -1586,9 +1640,29 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 							tdrows;
 
 				/* Fetch a random sample of the child's rows */
-				childrows = (*acquirefunc) (childrel, elevel,
-											rows + numrows, childtargrows,
-											&trows, &tdrows);
+				if (childrel->rd_rel->relkind == RELKIND_FOREIGN_TABLE)
+				{
+					childrows = (*acquirefunc) (childrel, elevel,
+												rows + numrows, childtargrows,
+												&trows, &tdrows);
+				}
+				else
+				{
+					AnalyzeSamplingStats *child_sampling_stats;
+
+					child_sampling_stats = (AnalyzeSamplingStats *)
+						palloc(sizeof(AnalyzeSamplingStats));
+
+					childrows = acquire_sample_rows(childrel, elevel,
+													rows + numrows, childtargrows,
+													&trows, &tdrows,
+													child_sampling_stats);
+
+					/* Set the relation OID and add to the list */
+					child_sampling_stats->relid = RelationGetRelid(childrel);
+					*sampling_stats_list = lappend(*sampling_stats_list,
+												   child_sampling_stats);
+				}
 
 				/* We may need to convert from child's rowtype to parent's */
 				if (childrows > 0 &&
diff --git a/src/include/commands/vacuum.h b/src/include/commands/vacuum.h
index e885a4b9c77..979b8088edb 100644
--- a/src/include/commands/vacuum.h
+++ b/src/include/commands/vacuum.h
@@ -300,6 +300,22 @@ typedef struct VacDeadItemsInfo
 	int64		num_items;		/* current # of entries */
 } VacDeadItemsInfo;
 
+/*
+ * AnalyzeSamplingStats stores sampling statistics collected during ANALYZE.
+ * This is used to report sampling information for both manual ANALYZE VERBOSE
+ * and autoanalyze logging.
+ */
+typedef struct AnalyzeSamplingStats
+{
+	Oid			relid;			/* relation OID */
+	BlockNumber totalpages;		/* total pages in relation */
+	BlockNumber scannedpages;	/* pages actually scanned */
+	double		liverows;		/* live rows found during sampling */
+	double		deadrows;		/* dead rows found during sampling */
+	int			samplerows;		/* number of rows in sample */
+	double		totalrows;		/* estimated total rows */
+} AnalyzeSamplingStats;
+
 /* GUC parameters */
 extern PGDLLIMPORT int default_statistics_target;	/* PGDLLIMPORT for PostGIS */
 extern PGDLLIMPORT int vacuum_freeze_min_age;
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 3f3a888fd0e..6f3e875c59e 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -120,6 +120,7 @@ AmcheckOptions
 AnalyzeAttrComputeStatsFunc
 AnalyzeAttrFetchFunc
 AnalyzeForeignTable_function
+AnalyzeSamplingStats
 AnlExprData
 AnlIndexData
 AnyArrayType
-- 
2.34.1

