From 2df4381fd685bd114450cc6afb5e3bea25d9f67d Mon Sep 17 00:00:00 2001
From: Anthonin Bonnefoy <anthonin.bonnefoy@datadoghq.com>
Date: Thu, 25 Jul 2024 09:34:55 +0200
Subject: Pass StringInfo logbuf to AcquireSampleFunc

Currently, row sampling functions emit log with interesting informations
like scanned pages or live tuples within the function itself. This
prevents the possibility to output those informations during ANALYZE
VERBOSE in a consistent way (a single log line emitted at the end of the
analysis).

To fix this, this patch modifies AcquireSampleRowsFunc to accept a
StringInfo that will store log output from the function. This allows to
control when and where the sampling functions logs should be by
do_analyze_rel.

elevel parameter has been removed from the AcquireSampleRowsFunc's
parameters as it is now unused.

Also, rows were replaced by tuples in logs for consistency.
---
 contrib/file_fdw/file_fdw.c         | 21 ++++----
 contrib/postgres_fdw/postgres_fdw.c | 23 ++++----
 doc/src/sgml/fdwhandler.sgml        |  4 +-
 src/backend/commands/analyze.c      | 81 +++++++++++++++--------------
 src/include/foreign/fdwapi.h        |  7 ++-
 5 files changed, 67 insertions(+), 69 deletions(-)

diff --git a/contrib/file_fdw/file_fdw.c b/contrib/file_fdw/file_fdw.c
index 249d82d3a05..20d96abce4e 100644
--- a/contrib/file_fdw/file_fdw.c
+++ b/contrib/file_fdw/file_fdw.c
@@ -160,9 +160,9 @@ static void estimate_size(PlannerInfo *root, RelOptInfo *baserel,
 static void estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
 						   FileFdwPlanState *fdw_private,
 						   Cost *startup_cost, Cost *total_cost);
-static int	file_acquire_sample_rows(Relation onerel, int elevel,
-									 HeapTuple *rows, int targrows,
-									 double *totalrows, double *totaldeadrows);
+static int	file_acquire_sample_rows(Relation onerel, HeapTuple *rows,
+									 int targrows, double *totalrows,
+									 double *totaldeadrows, StringInfo logbuf);
 
 
 /*
@@ -1120,9 +1120,9 @@ estimate_costs(PlannerInfo *root, RelOptInfo *baserel,
  * currently (the planner only pays attention to correlation for indexscans).
  */
 static int
-file_acquire_sample_rows(Relation onerel, int elevel,
-						 HeapTuple *rows, int targrows,
-						 double *totalrows, double *totaldeadrows)
+file_acquire_sample_rows(Relation onerel, HeapTuple *rows,
+						 int targrows, double *totalrows,
+						 double *totaldeadrows, StringInfo logbuf)
 {
 	int			numrows = 0;
 	double		rowstoskip = -1;	/* -1 means not set yet */
@@ -1241,13 +1241,10 @@ file_acquire_sample_rows(Relation onerel, int elevel,
 	pfree(nulls);
 
 	/*
-	 * Emit some interesting relation info
+	 * Add some interesting relation info to logbuf
 	 */
-	ereport(elevel,
-			(errmsg("\"%s\": file contains %.0f rows; "
-					"%d rows in sample",
-					RelationGetRelationName(onerel),
-					*totalrows, numrows)));
+	appendStringInfo(logbuf, "tuples: %.0f tuples; %d tuples in sample\n",
+					 *totalrows, numrows);
 
 	return numrows;
 }
diff --git a/contrib/postgres_fdw/postgres_fdw.c b/contrib/postgres_fdw/postgres_fdw.c
index fc65d81e217..54a65018474 100644
--- a/contrib/postgres_fdw/postgres_fdw.c
+++ b/contrib/postgres_fdw/postgres_fdw.c
@@ -498,10 +498,9 @@ static void process_query_params(ExprContext *econtext,
 								 FmgrInfo *param_flinfo,
 								 List *param_exprs,
 								 const char **param_values);
-static int	postgresAcquireSampleRowsFunc(Relation relation, int elevel,
-										  HeapTuple *rows, int targrows,
-										  double *totalrows,
-										  double *totaldeadrows);
+static int	postgresAcquireSampleRowsFunc(Relation relation, HeapTuple *rows,
+										  int targrows, double *totalrows,
+										  double *totaldeadrows, StringInfo logbuf);
 static void analyze_row_processor(PGresult *res, int row,
 								  PgFdwAnalyzeState *astate);
 static void produce_tuple_asynchronously(AsyncRequest *areq, bool fetch);
@@ -5064,10 +5063,9 @@ postgresGetAnalyzeInfoForForeignTable(Relation relation, bool *can_tablesample)
  * currently (the planner only pays attention to correlation for indexscans).
  */
 static int
-postgresAcquireSampleRowsFunc(Relation relation, int elevel,
-							  HeapTuple *rows, int targrows,
-							  double *totalrows,
-							  double *totaldeadrows)
+postgresAcquireSampleRowsFunc(Relation relation, HeapTuple *rows,
+							  int targrows, double *totalrows,
+							  double *totaldeadrows, StringInfo logbuf)
 {
 	PgFdwAnalyzeState astate;
 	ForeignTable *table;
@@ -5359,12 +5357,11 @@ postgresAcquireSampleRowsFunc(Relation relation, int elevel,
 		*totalrows = reltuples;
 
 	/*
-	 * Emit some interesting relation info
+	 * Add some interesting relation info to logbuf
 	 */
-	ereport(elevel,
-			(errmsg("\"%s\": table contains %.0f rows, %d rows in sample",
-					RelationGetRelationName(relation),
-					*totalrows, astate.numrows)));
+	appendStringInfo(logbuf,
+					 "tuples: %.0f tuples; %d tuples in sample\n",
+					 *totalrows, astate.numrows);
 
 	return astate.numrows;
 }
diff --git a/doc/src/sgml/fdwhandler.sgml b/doc/src/sgml/fdwhandler.sgml
index b80320504d6..d758b904ee5 100644
--- a/doc/src/sgml/fdwhandler.sgml
+++ b/doc/src/sgml/fdwhandler.sgml
@@ -1393,11 +1393,11 @@ AnalyzeForeignTable(Relation relation,
 <programlisting>
 int
 AcquireSampleRowsFunc(Relation relation,
-                      int elevel,
                       HeapTuple *rows,
                       int targrows,
                       double *totalrows,
-                      double *totaldeadrows);
+                      double *totaldeadrows,
+                      StringInfo logbuf);
 </programlisting>
 
      A random sample of up to <parameter>targrows</parameter> rows should be collected
diff --git a/src/backend/commands/analyze.c b/src/backend/commands/analyze.c
index 8b25eb99fb8..def9090850d 100644
--- a/src/backend/commands/analyze.c
+++ b/src/backend/commands/analyze.c
@@ -87,13 +87,13 @@ static void compute_index_stats(Relation onerel, double totalrows,
 								MemoryContext col_context);
 static VacAttrStats *examine_attribute(Relation onerel, int attnum,
 									   Node *index_expr);
-static int	acquire_sample_rows(Relation onerel, int elevel,
-								HeapTuple *rows, int targrows,
-								double *totalrows, double *totaldeadrows);
+static int	acquire_sample_rows(Relation onerel, HeapTuple *rows,
+								int targrows, double *totalrows,
+								double *totaldeadrows, StringInfo logbuf);
 static int	compare_rows(const void *a, const void *b, void *arg);
-static int	acquire_inherited_sample_rows(Relation onerel, int elevel,
-										  HeapTuple *rows, int targrows,
-										  double *totalrows, double *totaldeadrows);
+static int	acquire_inherited_sample_rows(Relation onerel, HeapTuple *rows,
+										  int targrows, double *totalrows,
+										  double *totaldeadrows, StringInfo logbuf);
 static void update_attstats(Oid relid, bool inh,
 							int natts, VacAttrStats **vacattrstats);
 static Datum std_fetch_func(VacAttrStatsP stats, int rownum, bool *isNull);
@@ -310,6 +310,7 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 	BufferUsage bufferusage;
 	PgStat_Counter startreadtime = 0;
 	PgStat_Counter startwritetime = 0;
+	StringInfoData logbuf;
 
 	verbose = (params->options & VACOPT_VERBOSE) != 0;
 	instrument = (verbose || (AmAutoVacuumWorkerProcess() &&
@@ -334,6 +335,9 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 										ALLOCSET_DEFAULT_SIZES);
 	caller_context = MemoryContextSwitchTo(anl_context);
 
+	/* Initialize log buffer */
+	initStringInfo(&logbuf);
+
 	/*
 	 * Switch to the table owner's userid, so that any index functions are run
 	 * as that user.  Also lock down security-restricted operations and
@@ -528,13 +532,11 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 								 inh ? PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS_INH :
 								 PROGRESS_ANALYZE_PHASE_ACQUIRE_SAMPLE_ROWS);
 	if (inh)
-		numrows = acquire_inherited_sample_rows(onerel, elevel,
-												rows, targrows,
-												&totalrows, &totaldeadrows);
+		numrows = acquire_inherited_sample_rows(onerel, rows, targrows,
+												&totalrows, &totaldeadrows, &logbuf);
 	else
-		numrows = (*acquirefunc) (onerel, elevel,
-								  rows, targrows,
-								  &totalrows, &totaldeadrows);
+		numrows = (*acquirefunc) (onerel, rows, targrows,
+								  &totalrows, &totaldeadrows, &logbuf);
 
 	/*
 	 * Compute the statistics.  Temporary results during the calculations for
@@ -808,6 +810,10 @@ do_analyze_rel(Relation onerel, VacuumParams *params,
 							 get_database_name(MyDatabaseId),
 							 get_namespace_name(RelationGetNamespace(onerel)),
 							 RelationGetRelationName(onerel));
+
+			/* Append logs from sampling function */
+			appendBinaryStringInfo(&buf, logbuf.data, logbuf.len);
+
 			if (track_io_timing)
 			{
 				double		read_ms = (double) (pgStatBlockReadTime - startreadtime) / 1000;
@@ -1182,9 +1188,9 @@ block_sampling_read_stream_next(ReadStream *stream,
  * density near the start of the table.
  */
 static int
-acquire_sample_rows(Relation onerel, int elevel,
-					HeapTuple *rows, int targrows,
-					double *totalrows, double *totaldeadrows)
+acquire_sample_rows(Relation onerel, HeapTuple *rows,
+					int targrows, double *totalrows,
+					double *totaldeadrows, StringInfo logbuf)
 {
 	int			numrows = 0;	/* # rows now in reservoir */
 	double		samplerows = 0; /* total # rows collected */
@@ -1321,16 +1327,12 @@ acquire_sample_rows(Relation onerel, int elevel,
 	}
 
 	/*
-	 * Emit some interesting relation info
+	 * Add interesting relation info to logbuf
 	 */
-	ereport(elevel,
-			(errmsg("\"%s\": scanned %d of %u pages, "
-					"containing %.0f live rows and %.0f dead rows; "
-					"%d rows in sample, %.0f estimated total rows",
-					RelationGetRelationName(onerel),
-					bs.m, totalblocks,
-					liverows, deadrows,
-					numrows, *totalrows)));
+	appendStringInfo(logbuf, _("pages: %u of %u scanned\n"),
+					 bs.m, totalblocks);
+	appendStringInfo(logbuf, _("tuples: %.0f live tuples, %.0f are dead; %d tuples in sample, %.0f estimated total tuples\n"),
+					 liverows, deadrows, numrows, *totalrows);
 
 	return numrows;
 }
@@ -1369,9 +1371,9 @@ compare_rows(const void *a, const void *b, void *arg)
  * children are foreign tables that don't support ANALYZE.
  */
 static int
-acquire_inherited_sample_rows(Relation onerel, int elevel,
-							  HeapTuple *rows, int targrows,
-							  double *totalrows, double *totaldeadrows)
+acquire_inherited_sample_rows(Relation onerel, HeapTuple *rows,
+							  int targrows, double *totalrows,
+							  double *totaldeadrows, StringInfo logbuf)
 {
 	List	   *tableOIDs;
 	Relation   *rels;
@@ -1407,10 +1409,10 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 		/* CCI because we already updated the pg_class row in this command */
 		CommandCounterIncrement();
 		SetRelationHasSubclass(RelationGetRelid(onerel), false);
-		ereport(elevel,
-				(errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables",
-						get_namespace_name(RelationGetNamespace(onerel)),
-						RelationGetRelationName(onerel))));
+		appendStringInfo(logbuf,
+						 "skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no child tables\n",
+						 get_namespace_name(RelationGetNamespace(onerel)),
+						 RelationGetRelationName(onerel));
 		return 0;
 	}
 
@@ -1505,10 +1507,10 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 	 */
 	if (!has_child)
 	{
-		ereport(elevel,
-				(errmsg("skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables",
-						get_namespace_name(RelationGetNamespace(onerel)),
-						RelationGetRelationName(onerel))));
+		appendStringInfo(logbuf,
+						 "skipping analyze of \"%s.%s\" inheritance tree --- this inheritance tree contains no analyzable child tables\n",
+						 get_namespace_name(RelationGetNamespace(onerel)),
+						 RelationGetRelationName(onerel));
 		return 0;
 	}
 
@@ -1560,10 +1562,13 @@ acquire_inherited_sample_rows(Relation onerel, int elevel,
 				double		trows,
 							tdrows;
 
+				appendStringInfo(logbuf, _("Sampling rows from child \"%s.%s\"\n"),
+								 get_namespace_name(RelationGetNamespace(childrel)),
+								 RelationGetRelationName(childrel));
 				/* Fetch a random sample of the child's rows */
-				childrows = (*acquirefunc) (childrel, elevel,
-											rows + numrows, childtargrows,
-											&trows, &tdrows);
+				childrows = (*acquirefunc) (childrel, rows + numrows,
+											childtargrows, &trows,
+											&tdrows, logbuf);
 
 				/* We may need to convert from child's rowtype to parent's */
 				if (childrows > 0 &&
diff --git a/src/include/foreign/fdwapi.h b/src/include/foreign/fdwapi.h
index fcde3876b28..b33a183b080 100644
--- a/src/include/foreign/fdwapi.h
+++ b/src/include/foreign/fdwapi.h
@@ -148,10 +148,9 @@ typedef void (*ExplainForeignModify_function) (ModifyTableState *mtstate,
 typedef void (*ExplainDirectModify_function) (ForeignScanState *node,
 											  struct ExplainState *es);
 
-typedef int (*AcquireSampleRowsFunc) (Relation relation, int elevel,
-									  HeapTuple *rows, int targrows,
-									  double *totalrows,
-									  double *totaldeadrows);
+typedef int (*AcquireSampleRowsFunc) (Relation relation, HeapTuple *rows,
+									  int targrows, double *totalrows,
+									  double *totaldeadrows, StringInfo logbuf);
 
 typedef bool (*AnalyzeForeignTable_function) (Relation relation,
 											  AcquireSampleRowsFunc *func,
-- 
2.39.3 (Apple Git-146)

