From 25211c6b6c4eebdcdea92a956b8cfbe928fa94f8 Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Wed, 24 Jul 2024 10:15:35 +0200
Subject: Output pages and tuples stats in ANALYZE VERBOSE

Pages and tuples stats were only displayed as a log within
acquire_sample function and ANALYZE VERBOSE couldn't display those as
they were not available, only the total live and dead tuples were
exported.

To make the behaviour more consistent with VACUUM VERBOSE and between
autoanalyze logs and ANALYZE VERBOSE:
- relevant sampling stats are in a AcquireSampleStats
- log is removed from acquire sample functions.
- do_analyze_rel now outputs scanned and tuples statistics when
  relevant. sampling from fdw doesn't provide those statistics so they
  are not displayed in those cases.

The sampling functions of both file_fdw and postgres_fdw were updated to reflect
those changes.
---
 contrib/file_fdw/file_fdw.c         |  36 +++------
 contrib/postgres_fdw/postgres_fdw.c |  29 ++-----
 src/backend/commands/analyze.c      | 113 ++++++++++++++--------------
 src/include/foreign/fdwapi.h        |  18 ++++-
 4 files changed, 89 insertions(+), 107 deletions(-)

diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 249d82d3a0..2a4b0d8a3b 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -160,9 +160,8 @@ static void estimate_size(PlannerInfo *root, RelOptInfo *baserel,
 static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
 						   FileFdwPlanState *fdw_private,
 						   Cost *startup_cost, Cost *total_cost);
-static int	file_acquire_sample_rows(Relation onerel, int elevel,
-									 HeapTuple *rows, int targrows,
-									 double *totalrows, double *totaldeadrows);
+static int	file_acquire_sample_rows(Relation onerel, HeapTuple *rows,
+									 int targrows, AcquireSampleStats * sstats);
 
 
 /*
@@ -1112,7 +1111,7 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
  * which must have at least targrows entries.
  * The actual number of rows selected is returned as the function result.
  * We also count the total number of rows in the file and return it into
- * *totalrows.  Note that *totaldeadrows is always set to 0.
+ * sstats->total_live_tuples.
  *
  * Note that the returned list of rows is not always in order by physical
  * position in the file.  Therefore, correlation estimates derived later
@@ -1120,11 +1119,9 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
  * currently (the planner only pays attention to correlation for indexscans).
  */
 static int
-file_acquire_sample_rows(Relation onerel, int elevel,
-						 HeapTuple *rows, int targrows,
-						 double *totalrows, double *totaldeadrows)
+file_acquire_sample_rows(Relation onerel, HeapTuple *rows,
+						 int targrows, AcquireSampleStats * sstats)
 {
-	int			numrows = 0;
 	double		rowstoskip = -1;	/* -1 means not set yet */
 	ReservoirStateData rstate;
 	TupleDesc	tupDesc;
@@ -1141,6 +1138,8 @@ file_acquire_sample_rows(Relation onerel, int elevel,
 
 	Assert(onerel);
 	Assert(targrows > 0);
+	Assert(sstats->sampled_tuples == 0);
+	Assert(sstats->total_live_tuples == 0);
 
 	tupDesc = RelationGetDescr(onerel);
 	values = (Datum *) palloc(tupDesc->natts * sizeof(Datum));
@@ -1172,8 +1171,6 @@ file_acquire_sample_rows(Relation onerel, int elevel,
 	errcallback.previous = error_context_stack;
 	error_context_stack = &errcallback;
 
-	*totalrows = 0;
-	*totaldeadrows = 0;
 	for (;;)
 	{
 		/* Check for user-requested abort or sleep */
@@ -1196,9 +1193,9 @@ file_acquire_sample_rows(Relation onerel, int elevel,
 		 * reach the end of the relation. This algorithm is from Jeff Vitter's
 		 * paper (see more info in commands/analyze.c).
 		 */
-		if (numrows < targrows)
+		if (sstats->sampled_tuples < targrows)
 		{
-			rows[numrows++] = heap_form_tuple(tupDesc, values, nulls);
+			rows[sstats->sampled_tuples++] = heap_form_tuple(tupDesc, values, nulls);
 		}
 		else
 		{
@@ -1208,7 +1205,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
 			 * not-yet-incremented value of totalrows as t.
 			 */
 			if (rowstoskip < 0)
-				rowstoskip = reservoir_get_next_S(&rstate, *totalrows, targrows);
+				rowstoskip = reservoir_get_next_S(&rstate, sstats->total_live_tuples, targrows);
 
 			if (rowstoskip <= 0)
 			{
@@ -1226,7 +1223,7 @@ file_acquire_sample_rows(Relation onerel, int elevel,
 			rowstoskip -= 1;
 		}
 
-		*totalrows += 1;
+		sstats->total_live_tuples += 1;
 	}
 
 	/* Remove error callback. */
@@ -1240,14 +1237,5 @@ file_acquire_sample_rows(Relation onerel, int elevel,
 	pfree(values);
 	pfree(nulls);
 
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": file contains %.0f rows; "
-					"%d rows in sample",
-					RelationGetRelationName(onerel),
-					*totalrows, numrows)));
-
-	return numrows;
+	return sstats->sampled_tuples;
 }
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index fc65d81e21..eddfef21e3 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -498,10 +498,8 @@ static void process_query_params(ExprContext *econtext,
 								 FmgrInfo *param_flinfo,
 								 List *param_exprs,
 								 const char **param_values);
-static int	postgresAcquireSampleRowsFunc(Relation relation, int elevel,
-										  HeapTuple *rows, int targrows,
-										  double *totalrows,
-										  double *totaldeadrows);
+static int	postgresAcquireSampleRowsFunc(Relation relation, HeapTuple *rows,
+										  int targrows, AcquireSampleStats * sstats);
 static void analyze_row_processor(PGresult *res, int row,
 								  PgFdwAnalyzeState *astate);
 static void produce_tuple_asynchronously(AsyncRequest *areq, bool fetch);
@@ -5056,7 +5054,7 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample)
  * which must have at least targrows entries.
  * The actual number of rows selected is returned as the function result.
  * We also count the total number of rows in the table and return it into
- * *totalrows.  Note that *totaldeadrows is always set to 0.
+ * sstats->total_live_tuples.
  *
  * Note that the returned list of rows is not always in order by physical
  * position in the table.  Therefore, correlation estimates derived later
@@ -5064,10 +5062,8 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample)
  * currently (the planner only pays attention to correlation for indexscans).
  */
 static int
-postgresAcquireSampleRowsFunc(Relation relation, int elevel,
-							  HeapTuple *rows, int targrows,
-							  double *totalrows,
-							  double *totaldeadrows)
+postgresAcquireSampleRowsFunc(Relation relation, HeapTuple *rows,
+							  int targrows, AcquireSampleStats * sstats)
 {
 	PgFdwAnalyzeState astate;
 	ForeignTable *table;
@@ -5345,26 +5341,17 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 
 	ReleaseConnection(conn);
 
-	/* We assume that we have no dead tuple. */
-	*totaldeadrows = 0.0;
-
 	/*
 	 * Without sampling, we've retrieved all living tuples from foreign
 	 * server, so report that as totalrows.  Otherwise use the reltuples
 	 * estimate we got from the remote side.
 	 */
 	if (method == ANALYZE_SAMPLE_OFF)
-		*totalrows = astate.samplerows;
+		sstats->total_live_tuples = astate.samplerows;
 	else
-		*totalrows = reltuples;
+		sstats->total_live_tuples = reltuples;
 
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": table contains %.0f rows, %d rows in sample",
-					RelationGetRelationName(relation),
-					*totalrows, astate.numrows)));
+	sstats->sampled_tuples = astate.numrows;
 
 	return astate.numrows;
 }
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index d492772aac..7469226313 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -87,13 +87,12 @@ static void compute_index_stats(Relation onerel, double totalrows,
 								MemoryContext col_context);
 static VacAttrStats *examine_attribute(Relation onerel, int attnum,
 									   Node *index_expr);
-static int	acquire_sample_rows(Relation onerel, int elevel,
-								HeapTuple *rows, int targrows,
-								double *totalrows, double *totaldeadrows);
+static int	acquire_sample_rows(Relation onerel, HeapTuple *rows,
+								int targrows, AcquireSampleStats * sstats);
 static int	compare_rows(const void *a, const void *b, void *arg);
 static int	acquire_inherited_sample_rows(Relation onerel, int elevel,
 										  HeapTuple *rows, int targrows,
-										  double *totalrows, double *totaldeadrows);
+										  AcquireSampleStats * sstats);
 static void update_attstats(Oid relid, bool inh,
 							int natts, VacAttrStats **vacattrstats);
 static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
@@ -296,8 +295,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	int			targrows,
 				numrows,
 				minrows;
-	double		totalrows,
-				totaldeadrows;
+	AcquireSampleStats sstats;
 	HeapTuple  *rows;
 	PGRUsage	ru0;
 	TimestampTz starttime = 0;
@@ -524,14 +522,16 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	pgstat_progress_update_param(PROGRESS_ANALYZE_PHASE,
 								 inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH :
 								 PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS);
+
+	memset(&sstats, 0, sizeof(AcquireSampleStats));
+
 	if (inh)
 		numrows = acquire_inherited_sample_rows(onerel, elevel,
 												rows, targrows,
-												&totalrows, &totaldeadrows);
+												&sstats);
 	else
-		numrows = (*acquirefunc) (onerel, elevel,
-								  rows, targrows,
-								  &totalrows, &totaldeadrows);
+		numrows = (*acquirefunc) (onerel, rows,
+								  targrows, &sstats);
 
 	/*
 	 * Compute the statistics.  Temporary results during the calculations for
@@ -562,7 +562,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 			stats->compute_stats(stats,
 								 std_fetch_func,
 								 numrows,
-								 totalrows);
+								 sstats.total_live_tuples);
 
 			/*
 			 * If the appropriate flavor of the n_distinct option is
@@ -582,7 +582,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 		}
 
 		if (nindexes > 0)
-			compute_index_stats(onerel, totalrows,
+			compute_index_stats(onerel, sstats.total_live_tuples,
 								indexdata, nindexes,
 								rows, numrows,
 								col_context);
@@ -607,7 +607,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 		}
 
 		/* Build extended statistics (if there are any). */
-		BuildRelationExtStatistics(onerel, inh, totalrows, numrows, rows,
+		BuildRelationExtStatistics(onerel, inh, sstats.total_live_tuples, numrows, rows,
 								   attr_cnt, vacattrstats);
 	}
 
@@ -641,7 +641,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 		CommandCounterIncrement();
 		vac_update_relstats(onerel,
 							relpages,
-							totalrows,
+							sstats.total_live_tuples,
 							relallvisible,
 							hasindex,
 							InvalidTransactionId,
@@ -655,7 +655,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 			AnlIndexData *thisdata = &indexdata[ind];
 			double		totalindexrows;
 
-			totalindexrows = ceil(thisdata->tupleFract * totalrows);
+			totalindexrows = ceil(thisdata->tupleFract * sstats.total_live_tuples);
 			vac_update_relstats(Irel[ind],
 								RelationGetNumberOfBlocks(Irel[ind]),
 								totalindexrows,
@@ -674,7 +674,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 		 * in their pg_class entries except for reltuples and relhasindex.
 		 */
 		CommandCounterIncrement();
-		vac_update_relstats(onerel, -1, totalrows,
+		vac_update_relstats(onerel, -1, sstats.total_live_tuples,
 							0, hasindex, InvalidTransactionId,
 							InvalidMultiXactId,
 							NULL, NULL,
@@ -691,8 +691,8 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	 * columns; otherwise, there is still work for auto-analyze to do.
 	 */
 	if (!inh)
-		pgstat_report_analyze(onerel, totalrows, totaldeadrows,
-							  (va_cols == NIL));
+		pgstat_report_analyze(onerel, sstats.total_live_tuples,
+							  sstats.total_dead_tuples, (va_cols == NIL));
 	else if (onerel->rd_rel->relkind == RELKIND_PARTITIONED_TABLE)
 		pgstat_report_analyze(onerel, 0, 0, (va_cols == NIL));
 
@@ -810,6 +810,26 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 							 get_database_name(MyDatabaseId),
 							 get_namespace_name(RelationGetNamespace(onerel)),
 							 RelationGetRelationName(onerel));
+
+			/*
+			 * Sampling from file_fdw and postgres_fdw won't report pages
+			 * stats nor live_tuples and dead_tuples. Just display tuples
+			 * sampled and total tuples estimate in this case.
+			 */
+			if (sstats.scanned_pages == 0)
+			{
+				appendStringInfo(&buf, _("tuples: %d tuples in samples, %.0f estimated total tuples\n"),
+								 sstats.sampled_tuples, sstats.total_live_tuples);
+			}
+			else
+			{
+				appendStringInfo(&buf, _("pages: %u of %u scanned\n"),
+								 sstats.scanned_pages, sstats.rel_pages);
+				appendStringInfo(&buf, _("tuples: %.0f live tuples, %.0f are dead; %d tuples in samples, %.0f estimated total tuples\n"),
+								 sstats.live_tuples, sstats.dead_tuples,
+								 sstats.sampled_tuples, sstats.total_live_tuples);
+			}
+
 			if (track_io_timing)
 			{
 				double		read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
@@ -1184,17 +1204,13 @@ block_sampling_read_stream_next(ReadStream *stream,
  * density near the start of the table.
  */
 static int
-acquire_sample_rows(Relation onerel, int elevel,
-					HeapTuple *rows, int targrows,
-					double *totalrows, double *totaldeadrows)
+acquire_sample_rows(Relation onerel, HeapTuple *rows,
+					int targrows, AcquireSampleStats * sstats)
 {
 	int			numrows = 0;	/* # rows now in reservoir */
 	double		samplerows = 0; /* total # rows collected */
-	double		liverows = 0;	/* # live rows seen */
-	double		deadrows = 0;	/* # dead rows seen */
 	double		rowstoskip = -1;	/* -1 means not set yet */
 	uint32		randseed;		/* Seed for block sampler(s) */
-	BlockNumber totalblocks;
 	TransactionId OldestXmin;
 	BlockSamplerData bs;
 	ReservoirStateData rstate;
@@ -1206,14 +1222,14 @@ acquire_sample_rows(Relation onerel, int elevel,
 
 	Assert(targrows > 0);
 
-	totalblocks = RelationGetNumberOfBlocks(onerel);
+	sstats->rel_pages = RelationGetNumberOfBlocks(onerel);
 
 	/* Need a cutoff xmin for HeapTupleSatisfiesVacuum */
 	OldestXmin = GetOldestNonRemovableTransactionId(onerel);
 
 	/* Prepare for sampling block numbers */
 	randseed = pg_prng_uint32(&pg_global_prng_state);
-	nblocks = BlockSampler_Init(&bs, totalblocks, targrows, randseed);
+	nblocks = BlockSampler_Init(&bs, sstats->rel_pages, targrows, randseed);
 
 	/* Report sampling block numbers */
 	pgstat_progress_update_param(PROGRESS_ANALYZE_BLOCKS_TOTAL,
@@ -1238,7 +1254,7 @@ acquire_sample_rows(Relation onerel, int elevel,
 	{
 		vacuum_delay_point();
 
-		while (table_scan_analyze_next_tuple(scan, OldestXmin, &liverows, &deadrows, slot))
+		while (table_scan_analyze_next_tuple(scan, OldestXmin, &sstats->live_tuples, &sstats->dead_tuples, slot))
 		{
 			/*
 			 * The first targrows sample rows are simply copied into the
@@ -1313,26 +1329,11 @@ acquire_sample_rows(Relation onerel, int elevel,
 	 */
 	if (bs.m > 0)
 	{
-		*totalrows = floor((liverows / bs.m) * totalblocks + 0.5);
-		*totaldeadrows = floor((deadrows / bs.m) * totalblocks + 0.5);
+		sstats->total_live_tuples = floor((sstats->live_tuples / bs.m) * sstats->rel_pages + 0.5);
+		sstats->total_dead_tuples = floor((sstats->dead_tuples / bs.m) * sstats->rel_pages + 0.5);
 	}
-	else
-	{
-		*totalrows = 0.0;
-		*totaldeadrows = 0.0;
-	}
-
-	/*
-	 * Emit some interesting relation info
-	 */
-	ereport(elevel,
-			(errmsg("\"%s\": scanned %d of %u pages, "
-					"containing %.0f live rows and %.0f dead rows; "
-					"%d rows in sample, %.0f estimated total rows",
-					RelationGetRelationName(onerel),
-					bs.m, totalblocks,
-					liverows, deadrows,
-					numrows, *totalrows)));
+	sstats->scanned_pages = bs.m;
+	sstats->sampled_tuples = numrows;
 
 	return numrows;
 }
@@ -1373,7 +1374,7 @@ compare_rows(const void *a, const void *b, void *arg)
 static int
 acquire_inherited_sample_rows(Relation onerel, int elevel,
 							  HeapTuple *rows, int targrows,
-							  double *totalrows, double *totaldeadrows)
+							  AcquireSampleStats * sstats)
 {
 	List	   *tableOIDs;
 	Relation   *rels;
@@ -1386,10 +1387,6 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 	ListCell   *lc;
 	bool		has_child;
 
-	/* Initialize output parameters to zero now, in case we exit early */
-	*totalrows = 0;
-	*totaldeadrows = 0;
-
 	/*
 	 * Find all members of inheritance set.  We only need AccessShareLock on
 	 * the children.
@@ -1559,13 +1556,13 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 			if (childtargrows > 0)
 			{
 				int			childrows;
-				double		trows,
-							tdrows;
+				AcquireSampleStats childsstats;
+
+				memset(&childsstats, 0, sizeof(AcquireSampleStats));
 
 				/* Fetch a random sample of the child's rows */
-				childrows = (*acquirefunc) (childrel, elevel,
-											rows + numrows, childtargrows,
-											&trows, &tdrows);
+				childrows = (*acquirefunc) (childrel, rows + numrows,
+											childtargrows, &childsstats);
 
 				/* We may need to convert from child's rowtype to parent's */
 				if (childrows > 0 &&
@@ -1594,8 +1591,8 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 
 				/* And add to counts */
 				numrows += childrows;
-				*totalrows += trows;
-				*totaldeadrows += tdrows;
+				sstats->total_live_tuples += childsstats.total_live_tuples;
+				sstats->total_dead_tuples += childsstats.total_dead_tuples;
 			}
 		}
 
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index fcde3876b2..b27571f92a 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -19,6 +19,18 @@
 /* To avoid including explain.h here, reference ExplainState thus: */
 struct ExplainState;
 
+typedef struct AcquireSampleStats
+{
+	BlockNumber rel_pages;		/* total number of pages */
+	BlockNumber scanned_pages;	/* # pages examined */
+	double		live_tuples;	/* # live tuples found in scanned pages */
+	double		dead_tuples;	/* # dead tuples found in scanned pages */
+
+	int			sampled_tuples; /* # sampled tuples */
+	double		total_live_tuples;	/* # estimated live tuples */
+	double		total_dead_tuples;	/* # estimated dead tuples */
+}			AcquireSampleStats;
+
 
 /*
  * Callback function signatures --- see fdwhandler.sgml for more info.
@@ -148,10 +160,8 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate,
 typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
 											  struct ExplainState *es);
 
-typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
-									  HeapTuple *rows, int targrows,
-									  double *totalrows,
-									  double *totaldeadrows);
+typedef int (*AcquireSampleRowsFunc) (Relation relation, HeapTuple *rows,
+									  int targrows, AcquireSampleStats * sstats);
 
 typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 											  AcquireSampleRowsFunc *func,
-- 
2.39.3 (Apple Git-146)

