From b13037ff82a9871b37296eaccc39c7574d38d20f Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 24 Nov 2021 11:16:55 -0500
Subject: [PATCH v17 3/7] Send IO operations to stats collector

On exit, backends send the IO operations they have done on all IO Paths
to the stats collector. The stats collector adds these counts to its
existing counts stored in a global data structure it maintains and
persists.

PgStatIOOps contains the same information as backend_status.h's IOOps,
however IOOps' members must be atomics and the stats collector has no
such requirement.
---
 src/backend/postmaster/pgstat.c | 103 +++++++++++++++++++++++++++++++-
 src/include/miscadmin.h         |   2 +
 src/include/pgstat.h            |  49 +++++++++++++++
 3 files changed, 151 insertions(+), 3 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 7264d2c727..05097fc7bd 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -126,9 +126,12 @@ char	   *pgstat_stat_filename = NULL;
 char	   *pgstat_stat_tmpname = NULL;
 
 /*
- * BgWriter and WAL global statistics counters.
- * Stored directly in a stats message structure so they can be sent
- * without needing to copy things around.  We assume these init to zeroes.
+ * BgWriter, Checkpointer, WAL, and I/O global statistics counters. I/O global
+ * statistics on various IO ops are tracked in PgBackendStatus while a backend
+ * is alive and then sent to stats collector before a backend exits in a
+ * PgStat_MsgIOPathOps.
+ * All others are stored directly in a stats message structure so they can be
+ * sent without needing to copy things around.  We assume these init to zeroes.
  */
 PgStat_MsgBgWriter PendingBgWriterStats;
 PgStat_MsgCheckpointer PendingCheckpointerStats;
@@ -369,6 +372,7 @@ static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
 static void pgstat_recv_archiver(PgStat_MsgArchiver *msg, int len);
 static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
 static void pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len);
+static void pgstat_recv_io_path_ops(PgStat_MsgIOPathOps *msg, int len);
 static void pgstat_recv_wal(PgStat_MsgWal *msg, int len);
 static void pgstat_recv_slru(PgStat_MsgSLRU *msg, int len);
 static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
@@ -3152,6 +3156,14 @@ pgstat_shutdown_hook(int code, Datum arg)
 {
 	Assert(!pgstat_is_shutdown);
 
+	/*
+	 * Only need to send stats on IO Ops for IO Paths when a process exits.
+	 * Users requiring IO Ops for both live and exited backends can read from
+	 * live backends' PgBackendStatus and sum this with totals from exited
+	 * backends persisted by the stats collector.
+	 */
+	pgstat_send_buffers();
+
 	/*
 	 * If we got as far as discovering our own database ID, we can report what
 	 * we did to the collector.  Otherwise, we'd be sending an invalid
@@ -3301,6 +3313,37 @@ pgstat_send_bgwriter(void)
 	MemSet(&PendingBgWriterStats, 0, sizeof(PendingBgWriterStats));
 }
 
+/*
+ * Before exiting, a backend sends its IO op statistics to the collector so
+ * that they may be persisted.
+ */
+void
+pgstat_send_buffers(void)
+{
+	PgStat_MsgIOPathOps msg;
+
+	PgBackendStatus *beentry = MyBEEntry;
+
+	/*
+	 * Though some backends with type B_INVALID (such as the single-user mode
+	 * process) do initialize and increment IO operations stats, there is no
+	 * spot in the array of IO operations for backends of type B_INVALID. As
+	 * such, do not send these to the stats collector.
+	 */
+	if (!beentry || beentry->st_backendType == B_INVALID)
+		return;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.backend_type = beentry->st_backendType;
+
+	pgstat_sum_io_path_ops(msg.iop.io_path_ops,
+						   (IOOps *) &beentry->io_path_stats);
+
+	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_IO_PATH_OPS);
+	pgstat_send(&msg, sizeof(msg));
+}
+
+
 /* ----------
  * pgstat_send_checkpointer() -
  *
@@ -3483,6 +3526,28 @@ pgstat_send_subscription_purge(PgStat_MsgSubscriptionPurge *msg)
 	pgstat_send(msg, len);
 }
 
+/*
+ * Helper function to sum all live IO Op stats for all IO Paths (e.g. shared,
+ * local) to those in the equivalent stats structure for exited backends. Note
+ * that this adds and doesn't set, so the destination stats structure should be
+ * zeroed out by the caller initially. This would commonly be used to transfer
+ * all IO Op stats for all IO Paths for a particular backend type to the
+ * pgstats structure.
+ */
+void
+pgstat_sum_io_path_ops(PgStatIOOps *dest, IOOps *src)
+{
+	for (int io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
+	{
+		dest->allocs += pg_atomic_read_u64(&src->allocs);
+		dest->extends += pg_atomic_read_u64(&src->extends);
+		dest->fsyncs += pg_atomic_read_u64(&src->fsyncs);
+		dest->writes += pg_atomic_read_u64(&src->writes);
+		dest++;
+		src++;
+	}
+}
+
 /* ----------
  * PgstatCollectorMain() -
  *
@@ -3692,6 +3757,10 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_checkpointer(&msg.msg_checkpointer, len);
 					break;
 
+				case PGSTAT_MTYPE_IO_PATH_OPS:
+					pgstat_recv_io_path_ops(&msg.msg_io_path_ops, len);
+					break;
+
 				case PGSTAT_MTYPE_WAL:
 					pgstat_recv_wal(&msg.msg_wal, len);
 					break;
@@ -5813,6 +5882,34 @@ pgstat_recv_checkpointer(PgStat_MsgCheckpointer *msg, int len)
 	globalStats.checkpointer.buf_fsync_backend += msg->m_buf_fsync_backend;
 }
 
+static void
+pgstat_recv_io_path_ops(PgStat_MsgIOPathOps *msg, int len)
+{
+	PgStatIOOps *src_io_path_ops;
+	PgStatIOOps *dest_io_path_ops;
+
+	/*
+	 * Subtract 1 from message's BackendType to get a valid index into the
+	 * array of IO Ops which does not include an entry for B_INVALID
+	 * BackendType.
+	 */
+	Assert(msg->backend_type > B_INVALID);
+
+	src_io_path_ops = msg->iop.io_path_ops;
+	dest_io_path_ops = globalStats.buffers.ops[msg->backend_type - 1].io_path_ops;
+
+	for (int io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
+	{
+		PgStatIOOps *src = &src_io_path_ops[io_path];
+		PgStatIOOps *dest = &dest_io_path_ops[io_path];
+
+		dest->allocs += src->allocs;
+		dest->extends += src->extends;
+		dest->fsyncs += src->fsyncs;
+		dest->writes += src->writes;
+	}
+}
+
 /* ----------
  * pgstat_recv_wal() -
  *
diff --git a/src/include/miscadmin.h b/src/include/miscadmin.h
index 90a3016065..662170c72e 100644
--- a/src/include/miscadmin.h
+++ b/src/include/miscadmin.h
@@ -338,6 +338,8 @@ typedef enum BackendType
 	B_LOGGER,
 } BackendType;
 
+#define BACKEND_NUM_TYPES B_LOGGER
+
 extern BackendType MyBackendType;
 
 extern const char *GetBackendTypeDesc(BackendType backendType);
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 5b51b58e5a..f99be84db6 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -73,6 +73,7 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_ARCHIVER,
 	PGSTAT_MTYPE_BGWRITER,
 	PGSTAT_MTYPE_CHECKPOINTER,
+	PGSTAT_MTYPE_IO_PATH_OPS,
 	PGSTAT_MTYPE_WAL,
 	PGSTAT_MTYPE_SLRU,
 	PGSTAT_MTYPE_FUNCSTAT,
@@ -335,6 +336,50 @@ typedef struct PgStat_MsgDropdb
 } PgStat_MsgDropdb;
 
 
+/*
+ * Structure for counting all types of IO ops in the stats collector
+ */
+typedef struct PgStatIOOps
+{
+	PgStat_Counter allocs;
+	PgStat_Counter extends;
+	PgStat_Counter fsyncs;
+	PgStat_Counter writes;
+} PgStatIOOps;
+
+/*
+ * Structure for counting all IO Ops on all types of IO Paths.
+ */
+typedef struct PgStatIOPathOps
+{
+	PgStatIOOps io_path_ops[IOPATH_NUM_TYPES];
+} PgStatIOPathOps;
+
+/*
+ * Sent by a backend to the stats collector to report all IO Ops for all IO
+ * Paths for a given type of a backend. This will happen when the backend exits.
+ */
+typedef struct PgStat_MsgIOPathOps
+{
+	PgStat_MsgHdr m_hdr;
+
+	BackendType backend_type;
+	PgStatIOPathOps iop;
+} PgStat_MsgIOPathOps;
+
+/*
+ * Structure used by stats collector to keep track of all types of exited
+ * backends' IO Ops for all IO Paths as well as all stats from live backends at
+ * the time of stats reset. resets is populated using a reset message sent to
+ * the stats collector. Be sure to subtract 1 from BackendType when accessing
+ * the array "ops" or "resets", as they do not contain entries for B_INVALID
+ * BackendType.
+ */
+typedef struct PgStat_BackendIOPathOps
+{
+	PgStatIOPathOps ops[BACKEND_NUM_TYPES];
+} PgStat_BackendIOPathOps;
+
 /* ----------
  * PgStat_MsgResetcounter		Sent by the backend to tell the collector
  *								to reset counters
@@ -756,6 +801,7 @@ typedef union PgStat_Msg
 	PgStat_MsgArchiver msg_archiver;
 	PgStat_MsgBgWriter msg_bgwriter;
 	PgStat_MsgCheckpointer msg_checkpointer;
+	PgStat_MsgIOPathOps msg_io_path_ops;
 	PgStat_MsgWal msg_wal;
 	PgStat_MsgSLRU msg_slru;
 	PgStat_MsgFuncstat msg_funcstat;
@@ -939,6 +985,7 @@ typedef struct PgStat_GlobalStats
 
 	PgStat_CheckpointerStats checkpointer;
 	PgStat_BgWriterStats bgwriter;
+	PgStat_BackendIOPathOps buffers;
 } PgStat_GlobalStats;
 
 /*
@@ -1215,8 +1262,10 @@ extern void pgstat_twophase_postabort(TransactionId xid, uint16 info,
 
 extern void pgstat_send_archiver(const char *xlog, bool failed);
 extern void pgstat_send_bgwriter(void);
+extern void pgstat_send_buffers(void);
 extern void pgstat_send_checkpointer(void);
 extern void pgstat_send_wal(bool force);
+extern void pgstat_sum_io_path_ops(PgStatIOOps *dest, IOOps *src);
 
 /* ----------
  * Support functions for the SQL-callable functions to
-- 
2.32.0

