From 939f7f01239b56865e212c24cd63437a46a344b1 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 24 Nov 2021 11:16:55 -0500
Subject: [PATCH v21 4/8] Send IO operations to stats collector

On exit, backends send the IO operations they have done on all IO Paths
to the stats collector. The stats collector adds these counts to its
existing counts stored in a global data structure it maintains and
persists.

PgStatIOOpCounters contains the same information as backend_status.h's
IOOpCounters, however IOOpCounters' members must be atomics and the
stats collector has no such requirement.

Suggested by Andres Freund

Author: Melanie Plageman <melanieplageman@gmail.com>
Reviewed-by: Justin Pryzby <pryzby@telsasoft.com>
Discussion: https://www.postgresql.org/message-id/flat/20200124195226.lth52iydq2n2uilq%40alap3.anarazel.de
---
 src/backend/postmaster/pgstat.c    | 100 ++++++++++++++++++++++++++++-
 src/include/miscadmin.h            |   2 +
 src/include/pgstat.h               |  56 ++++++++++++++++
 src/include/utils/backend_status.h |  37 +++++++++++
 4 files changed, 194 insertions(+), 1 deletion(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 0646f53098..5eaf8b6ee7 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -126,7 +126,7 @@ char	   *pgstat_stat_filename = NULL;
 char	   *pgstat_stat_tmpname = NULL;
 
 /*
- * BgWriter and WAL global statistics counters.
+ * BgWriter, Checkpointer, and WAL global statistics counters.
  * Stored directly in a stats message structure so they can be sent
  * without needing to copy things around.  We assume these init to zeroes.
  */
@@ -369,6 +369,7 @@ static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
 static void pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len);
+static void pgstat_recv_io_path_ops(PgStat_MsgIOPathOps *msg, int len);
 static void pgstat_recv_wal(PgStat_MsgWal *msg, int len);
 static void pgstat_recv_slru(PgStat_MsgSLRU *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
@@ -3152,6 +3153,14 @@ pgstat_shutdown_hook(int code, Datum arg)
 {
 	Assert(!pgstat_is_shutdown);
 
+	/*
+	 * Only need to send stats on IOOps for IOPaths when a process exits. Users
+	 * requiring IOOps for both live and exited backends can read from live
+	 * backends' PgBackendStatuses and sum this with totals from exited
+	 * backends persisted by the stats collector.
+	 */
+	pgstat_send_buffers();
+
 	/*
 	 * If we got as far as discovering our own database ID, we can report what
 	 * we did to the collector.  Otherwise, we'd be sending an invalid
@@ -3301,6 +3310,46 @@ pgstat_send_bgwriter(void)
 	MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats));
 }
 
+/*
+ * Before exiting, a backend sends its IO operations statistics to the
+ * collector so that they may be persisted.
+ */
+void
+pgstat_send_buffers(void)
+{
+	PgStatIOOpCounters *io_path_ops;
+	PgStat_MsgIOPathOps msg;
+
+	PgBackendStatus *beentry = MyBEEntry;
+	PgStat_Counter sum = 0;
+
+	if (!beentry || beentry->st_backendType == B_INVALID)
+		return;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.backend_type = beentry->st_backendType;
+
+	io_path_ops = msg.iop.io_path_ops;
+	pgstat_sum_io_path_ops(io_path_ops, (IOOpCounters *)
+			&beentry->io_path_stats);
+
+	/* If no IO was done, don't bother sending anything to the stats collector. */
+	for (int i = 0; i < IOPATH_NUM_TYPES; i++)
+	{
+		sum += io_path_ops[i].allocs;
+		sum += io_path_ops[i].extends;
+		sum += io_path_ops[i].fsyncs;
+		sum += io_path_ops[i].writes;
+	}
+
+	if (sum == 0)
+		return;
+
+	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_IO_PATH_OPS);
+	pgstat_send(&msg, sizeof(msg));
+}
+
+
 /* ----------
  * pgstat_send_checkpointer() -
  *
@@ -3483,6 +3532,29 @@ pgstat_send_subscription_purge(PgStat_MsgSubscriptionPurge *msg)
 	pgstat_send(msg, len);
 }
 
+/*
+ * Helper function to sum all IO operations stats for all IOPaths (e.g. shared,
+ * local) from live backends with those in the equivalent stats structure for
+ * exited backends.
+ * Note that this adds and doesn't set, so the destination stats structure
+ * should be zeroed out by the caller initially.
+ * This would commonly be used to transfer all IOOp stats for all IOPaths for a
+ * particular backend type to the pgstats structure.
+ */
+void
+pgstat_sum_io_path_ops(PgStatIOOpCounters *dest, IOOpCounters *src)
+{
+	for (int i = 0; i < IOPATH_NUM_TYPES; i++)
+	{
+		dest->allocs += pg_atomic_read_u64(&src->allocs);
+		dest->extends += pg_atomic_read_u64(&src->extends);
+		dest->fsyncs += pg_atomic_read_u64(&src->fsyncs);
+		dest->writes += pg_atomic_read_u64(&src->writes);
+		dest++;
+		src++;
+	}
+}
+
 /* ----------
  * PgstatCollectorMain() -
  *
@@ -3692,6 +3764,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_checkpointer(&msg.msg_checkpointer, len);
 					break;
 
+				case PGSTAT_MTYPE_IO_PATH_OPS:
+					pgstat_recv_io_path_ops(&msg.msg_io_path_ops, len);
+					break;
+
 				case PGSTAT_MTYPE_WAL:
 					pgstat_recv_wal(&msg.msg_wal, len);
 					break;
@@ -5813,6 +5889,28 @@ pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len)
 	globalStats.checkpointer.buf_fsync_backend += msg->m_buf_fsync_backend;
 }
 
+static void
+pgstat_recv_io_path_ops(PgStat_MsgIOPathOps *msg, int len)
+{
+	PgStatIOOpCounters *src_io_path_ops;
+	PgStatIOOpCounters *dest_io_path_ops;
+
+	src_io_path_ops = msg->iop.io_path_ops;
+	dest_io_path_ops =
+		globalStats.buffers.ops[backend_type_get_idx(msg->backend_type)].io_path_ops;
+
+	for (int i = 0; i < IOPATH_NUM_TYPES; i++)
+	{
+		PgStatIOOpCounters *src = &src_io_path_ops[i];
+		PgStatIOOpCounters *dest = &dest_io_path_ops[i];
+
+		dest->allocs += src->allocs;
+		dest->extends += src->extends;
+		dest->fsyncs += src->fsyncs;
+		dest->writes += src->writes;
+	}
+}
+
 /* ----------
  * pgstat_recv_wal() -
  *
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 94c6135e93..77c89134c2 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -338,6 +338,8 @@ typedef enum BackendType
 	B_WAL_WRITER,
 } BackendType;
 
+#define BACKEND_NUM_TYPES B_WAL_WRITER
+
 extern BackendType MyBackendType;
 
 extern const char *GetBackendTypeDesc(BackendType backendType);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index e10d20222a..431f273d23 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -73,6 +73,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_ARCHIVER,
 	PGSTAT_MTYPE_BGWRITER,
 	PGSTAT_MTYPE_CHECKPOINTER,
+	PGSTAT_MTYPE_IO_PATH_OPS,
 	PGSTAT_MTYPE_WAL,
 	PGSTAT_MTYPE_SLRU,
 	PGSTAT_MTYPE_FUNCSTAT,
@@ -335,6 +336,48 @@ typedef struct PgStat_MsgDropdb
 } PgStat_MsgDropdb;
 
 
+/*
+ * Structure for counting all types of IOOps in the stats collector
+ */
+typedef struct PgStatIOOpCounters
+{
+	PgStat_Counter allocs;
+	PgStat_Counter extends;
+	PgStat_Counter fsyncs;
+	PgStat_Counter writes;
+} PgStatIOOpCounters;
+
+/*
+ * Structure for counting all IOOps on all types of IOPaths.
+ */
+typedef struct PgStatIOPathOps
+{
+	PgStatIOOpCounters io_path_ops[IOPATH_NUM_TYPES];
+} PgStatIOPathOps;
+
+/*
+ * Sent by a backend to the stats collector to report all IOOps for all IOPaths
+ * for a given type of a backend. This will happen when the backend exits.
+ */
+typedef struct PgStat_MsgIOPathOps
+{
+	PgStat_MsgHdr m_hdr;
+
+	BackendType backend_type;
+	PgStatIOPathOps iop;
+} PgStat_MsgIOPathOps;
+
+/*
+ * Structure used by stats collector to keep track of all types of exited
+ * backends' IOOps for all IOPaths as well as all stats from live backends at
+ * the time of stats reset. resets is populated using a reset message sent to
+ * the stats collector.
+ */
+typedef struct PgStat_BackendIOPathOps
+{
+	PgStatIOPathOps ops[BACKEND_NUM_TYPES];
+} PgStat_BackendIOPathOps;
+
 /* ----------
  * PgStat_MsgResetcounter		Sent by the backend to tell the collector
  *								to reset counters
@@ -756,6 +799,7 @@ typedef union PgStat_Msg
 	PgStat_MsgArchiver msg_archiver;
 	PgStat_MsgBgWriter msg_bgwriter;
 	PgStat_MsgCheckpointer msg_checkpointer;
+	PgStat_MsgIOPathOps msg_io_path_ops;
 	PgStat_MsgWal msg_wal;
 	PgStat_MsgSLRU msg_slru;
 	PgStat_MsgFuncstat msg_funcstat;
@@ -939,6 +983,7 @@ typedef struct PgStat_GlobalStats
 
 	PgStat_CheckpointerStats checkpointer;
 	PgStat_BgWriterStats bgwriter;
+	PgStat_BackendIOPathOps buffers;
 } PgStat_GlobalStats;
 
 /*
@@ -1215,8 +1260,19 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 
 extern void pgstat_send_archiver(const char *xlog, bool failed);
 extern void pgstat_send_bgwriter(void);
+/*
+ * While some processes send some types of statistics to the collector at
+ * regular intervals (e.g. CheckpointerMain() calling
+ * pgstat_send_checkpointer()), IO operations stats are only sent by
+ * pgstat_send_buffers() when a process exits (in pgstat_shutdown_hook()). IO
+ * operations stats from live backends can be read from their PgBackendStatuses
+ * and, if desired, summed with totals from exited backends persisted by the
+ * stats collector.
+ */
+extern void pgstat_send_buffers(void);
 extern void pgstat_send_checkpointer(void);
 extern void pgstat_send_wal(bool force);
+extern void pgstat_sum_io_path_ops(PgStatIOOpCounters *dest, IOOpCounters *src);
 
 /* ----------
  * Support functions for the SQL-callable functions to
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index 950b7396a5..3de1e7c8d3 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -331,6 +331,43 @@ extern void CreateSharedBackendStatus(void);
  * ----------
  */
 
+/* Utility functions */
+
+/*
+ * When maintaining an array of information about all valid BackendTypes, in
+ * order to avoid wasting the 0th spot, use this helper to convert a valid
+ * BackendType to a valid location in the array (given that no spot is
+ * maintained for B_INVALID BackendType).
+ */
+static inline int backend_type_get_idx(BackendType backend_type)
+{
+	/*
+	 * backend_type must be one of the valid backend types. If caller is
+	 * maintaining backend information in an array that includes B_INVALID,
+	 * this function is unnecessary.
+	 */
+	Assert(backend_type > B_INVALID && backend_type <= BACKEND_NUM_TYPES);
+	return backend_type - 1;
+}
+
+/*
+ * When using a value from an array of information about all valid
+ * BackendTypes, add 1 to the index before using it as a BackendType to adjust
+ * for not maintaining a spot for B_INVALID BackendType.
+ */
+static inline BackendType idx_get_backend_type(int idx)
+{
+	int backend_type = idx + 1;
+	/*
+	 * If the array includes a spot for B_INVALID BackendType this function is
+	 * not required.
+	 */
+	Assert(backend_type > B_INVALID && backend_type <= BACKEND_NUM_TYPES);
+	return backend_type;
+}
+
+extern const char *GetIOPathDesc(IOPath io_path);
+
 /* Initialization functions */
 extern void pgstat_beinit(void);
 extern void pgstat_bestart(void);
-- 
2.32.0

