From b569f8b0a3a886cf759e9ac8d2376eadb45b6c29 Mon Sep 17 00:00:00 2001
From: Sami Imseih <simseih@amazon.com>
Date: Mon, 10 Nov 2025 00:03:41 -0600
Subject: [PATCH v2 1/2] pgstat: support custom serialization files and
 callbacks

Allow custom statistics kinds to serialize and deserialize extra
per-entry data, supporting kinds with variable auxiliary data that
cannot fit in shared-memory.

To allow this, 3 callbacks are provided to serialize, deserialize
and to clean-up resources when writing, reading and discarding
entries. The latter is required for crash recovery scenarios.
---
 src/backend/utils/activity/pgstat.c | 61 ++++++++++++++++++++++++++++-
 src/include/utils/pgstat_internal.h | 33 ++++++++++++++++
 2 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/src/backend/utils/activity/pgstat.c b/src/backend/utils/activity/pgstat.c
index 8713c7a0483..117728db016 100644
--- a/src/backend/utils/activity/pgstat.c
+++ b/src/backend/utils/activity/pgstat.c
@@ -523,6 +523,7 @@ pgstat_discard_stats(void)
 
 	/* NB: this needs to be done even in single user mode */
 
+	/* First, cleanup the core stats file */
 	ret = unlink(PGSTAT_STAT_PERMANENT_FILENAME);
 	if (ret != 0)
 	{
@@ -544,6 +545,15 @@ pgstat_discard_stats(void)
 								 PGSTAT_STAT_PERMANENT_FILENAME)));
 	}
 
+	/* Now, cleanup every custom kinds extra stats files */
+	for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
+	{
+		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+		if (kind_info && kind_info->end_extra_stats)
+			kind_info->end_extra_stats(STATS_DISCARD);
+	}
+
 	/*
 	 * Reset stats contents. This will set reset timestamps of fixed-numbered
 	 * stats to the current time (no variable stats exist).
@@ -1463,6 +1473,7 @@ pgstat_get_kind_info(PgStat_Kind kind)
 void
 pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
 {
+	bool		has_extra = false;
 	uint32		idx = kind - PGSTAT_KIND_CUSTOM_MIN;
 
 	if (kind_info->name == NULL || strlen(kind_info->name) == 0)
@@ -1525,6 +1536,26 @@ pgstat_register_kind(PgStat_Kind kind, const PgStat_KindInfo *kind_info)
 					 errdetail("Existing cumulative statistics with ID %u has the same name.", existing_kind)));
 	}
 
+	/*
+	 * Ensure that to_serialized_extra_stats, from_serialized_extra_stats and
+	 * end_extra_stats are registered together or not at all.
+	 */
+	has_extra =
+		kind_info->to_serialized_extra_stats ||
+		kind_info->from_serialized_extra_stats ||
+		kind_info->end_extra_stats;
+
+	if (has_extra &&
+		(!kind_info->to_serialized_extra_stats ||
+		 !kind_info->from_serialized_extra_stats ||
+		 !kind_info->end_extra_stats))
+	{
+		ereport(ERROR,
+				(errmsg("could not register custom cumulative statistics \"%s\" with ID %u",
+						kind_info->name, kind),
+				 errdetail("callbacks to_serialized_extra, from_serialized_extra, and end_extra_stats must all be provided together.")));
+	}
+
 	/* Register it */
 	pgstat_kind_custom_infos[idx] = kind_info;
 	ereport(LOG,
@@ -1702,6 +1733,9 @@ pgstat_write_statsfile(void)
 		pgstat_write_chunk(fpout,
 						   pgstat_get_entry_data(ps->key.kind, shstats),
 						   pgstat_get_entry_len(ps->key.kind));
+
+		if (pgstat_is_kind_custom(ps->key.kind) && kind_info->to_serialized_extra_stats)
+			kind_info->to_serialized_extra_stats(&ps->key, shstats, fpout);
 	}
 	dshash_seq_term(&hstat);
 
@@ -1734,6 +1768,15 @@ pgstat_write_statsfile(void)
 		/* durable_rename already emitted log message */
 		unlink(tmpfile);
 	}
+
+	/* Now, allow the extension to finalize the writes for the extra files */
+	for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
+	{
+		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+		if (kind_info && kind_info->end_extra_stats)
+			kind_info->end_extra_stats(STATS_WRITE);
+	}
 }
 
 /* helper for pgstat_read_statsfile() */
@@ -1871,6 +1914,7 @@ pgstat_read_statsfile(void)
 					PgStat_HashKey key;
 					PgStatShared_HashEntry *p;
 					PgStatShared_Common *header;
+					const PgStat_KindInfo *kind_info = NULL;
 
 					CHECK_FOR_INTERRUPTS();
 
@@ -1891,7 +1935,8 @@ pgstat_read_statsfile(void)
 							goto error;
 						}
 
-						if (!pgstat_get_kind_info(key.kind))
+						kind_info = pgstat_get_kind_info(key.kind);
+						if (!kind_info)
 						{
 							elog(WARNING, "could not find information of kind for entry %u/%u/%" PRIu64 " of type %c",
 								 key.kind, key.dboid,
@@ -1902,7 +1947,6 @@ pgstat_read_statsfile(void)
 					else
 					{
 						/* stats entry identified by name on disk (e.g. slots) */
-						const PgStat_KindInfo *kind_info = NULL;
 						PgStat_Kind kind;
 						NameData	name;
 
@@ -1996,6 +2040,9 @@ pgstat_read_statsfile(void)
 						goto error;
 					}
 
+					if (pgstat_is_kind_custom(key.kind) && kind_info->from_serialized_extra_stats)
+						kind_info->from_serialized_extra_stats(&key, header, fpin);
+
 					break;
 				}
 			case PGSTAT_FILE_ENTRY_END:
@@ -2019,11 +2066,21 @@ pgstat_read_statsfile(void)
 	}
 
 done:
+	/* first, cleanup the core stats file */
 	FreeFile(fpin);
 
 	elog(DEBUG2, "removing permanent stats file \"%s\"", statfile);
 	unlink(statfile);
 
+	/* Now, cleanup every custom kinds extra stats files */
+	for (PgStat_Kind kind = PGSTAT_KIND_CUSTOM_MIN; kind <= PGSTAT_KIND_CUSTOM_MAX; kind++)
+	{
+		const PgStat_KindInfo *kind_info = pgstat_get_kind_info(kind);
+
+		if (kind_info && kind_info->end_extra_stats)
+			kind_info->end_extra_stats(STATS_READ);
+	}
+
 	return;
 
 error:
diff --git a/src/include/utils/pgstat_internal.h b/src/include/utils/pgstat_internal.h
index ca1ba6420ca..25bba6e98d4 100644
--- a/src/include/utils/pgstat_internal.h
+++ b/src/include/utils/pgstat_internal.h
@@ -63,6 +63,20 @@ typedef struct PgStat_HashKey
 								 * identifier. */
 } PgStat_HashKey;
 
+/*
+ * Tracks if the stats file is being read, written or discarded.
+ *
+ * These states allow plugins that create extra statistics files
+ * to determine the current operation and perform any necessary
+ * file cleanup.
+ */
+typedef enum PgStat_StatsFileOp
+{
+	STATS_WRITE,
+	STATS_READ,
+	STATS_DISCARD,
+}			PgStat_StatsFileOp;
+
 /*
  * PgStat_HashKey should not have any padding.  Checking that the structure
  * size matches with the sum of each field is a check simple enough to
@@ -303,6 +317,25 @@ typedef struct PgStat_KindInfo
 									   const PgStatShared_Common *header, NameData *name);
 	bool		(*from_serialized_name) (const NameData *name, PgStat_HashKey *key);
 
+	/*
+	 * Optional callbacks for kinds that write additional per-entry data to
+	 * the stats file.  If any of these callbacks are provided, all three must
+	 * be provided to ensure that the reader consumes exactly the data written
+	 * by the writer.
+	 *
+	 * to_serialized_extra_stats: write extra data for an entry.
+	 *
+	 * from_serialized_extra_stats: read the extra data for an entry.
+	 *
+	 * end_extra_stats: invoked once per operation (read, write, discard)
+	 * after all entries of this kind have been processed.
+	 */
+	void		(*to_serialized_extra_stats) (const PgStat_HashKey *key,
+											  const PgStatShared_Common *header, FILE *statfile);
+	void		(*from_serialized_extra_stats) (const PgStat_HashKey *key,
+												const PgStatShared_Common *header, FILE *statfile);
+	void		(*end_extra_stats) (PgStat_StatsFileOp status);
+
 	/*
 	 * For fixed-numbered statistics: Initialize shared memory state.
 	 *
-- 
2.43.0

