From f972ea87270feaed464a74fb6541ac04b4fc7d98 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 24 Nov 2021 11:39:48 -0500
Subject: [PATCH v17 4/7] Add "buffers" to pgstat_reset_shared_counters

Backends count IO operations for various IO paths in their PgBackendStatus.
Upon exit, they send these counts to the stats collector. Prior to this commit,
these IO Ops stats would have been reset when the target was "bgwriter".

With this commit, target "bgwriter" no longer will cause the IO operations
stats to be reset, and the IO operations stats can be reset with new target,
"buffers".
---
 doc/src/sgml/monitoring.sgml                |  2 +-
 src/backend/postmaster/pgstat.c             | 83 +++++++++++++++++++--
 src/backend/utils/activity/backend_status.c | 29 +++++++
 src/include/pgstat.h                        |  8 +-
 src/include/utils/backend_status.h          |  2 +
 5 files changed, 117 insertions(+), 7 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 62f2a3332b..bda3eef309 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -3604,7 +3604,7 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
        <structfield>stats_reset</structfield> <type>timestamp with time zone</type>
       </para>
       <para>
-       Time at which these statistics were last reset
+       Time at which these statistics were last reset.
       </para></entry>
      </row>
     </tbody>
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 05097fc7bd..c40b375b9a 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -1512,6 +1512,35 @@ pgstat_reset_counters(void)
 	pgstat_send(&msg, sizeof(msg));
 }
 
+/*
+ * Helper function to collect and send live backends' current IO operations
+ * stats counters when a stats reset is initiated so that they may be deducted
+ * from future totals.
+ */
+static void
+pgstat_send_buffers_reset(PgStat_MsgResetsharedcounter *msg)
+{
+	PgStatIOPathOps ops[BACKEND_NUM_TYPES];
+
+	memset(ops, 0, sizeof(ops));
+	pgstat_report_live_backend_io_path_ops(ops);
+
+	/*
+	 * Iterate through the array of IO Ops for all IO Paths for each
+	 * BackendType. Because the array does not include a spot for BackendType
+	 * B_INVALID, add 1 to the index when setting backend_type so that there is
+	 * no confusion as to the BackendType with which this reset message
+	 * corresponds.
+	 */
+	for (int backend_type_idx = 0; backend_type_idx < BACKEND_NUM_TYPES; backend_type_idx++)
+	{
+		msg->m_backend_resets.backend_type = backend_type_idx + 1;
+		memcpy(&msg->m_backend_resets.iop, &ops[backend_type_idx],
+				sizeof(msg->m_backend_resets.iop));
+		pgstat_send(msg, sizeof(PgStat_MsgResetsharedcounter));
+	}
+}
+
 /* ----------
  * pgstat_reset_shared_counters() -
  *
@@ -1529,7 +1558,14 @@ pgstat_reset_shared_counters(const char *target)
 	if (pgStatSock == PGINVALID_SOCKET)
 		return;
 
-	if (strcmp(target, "archiver") == 0)
+	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
+	if (strcmp(target, "buffers") == 0)
+	{
+		msg.m_resettarget = RESET_BUFFERS;
+		pgstat_send_buffers_reset(&msg);
+		return;
+	}
+	else if (strcmp(target, "archiver") == 0)
 		msg.m_resettarget = RESET_ARCHIVER;
 	else if (strcmp(target, "bgwriter") == 0)
 		msg.m_resettarget = RESET_BGWRITER;
@@ -1539,9 +1575,10 @@ pgstat_reset_shared_counters(const char *target)
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("unrecognized reset target: \"%s\"", target),
-				 errhint("Target must be \"archiver\", \"bgwriter\", or \"wal\".")));
+				 errhint(
+					 "Target must be \"archiver\", \"bgwriter\", \"buffers\", or \"wal\".")));
+
 
-	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RESETSHAREDCOUNTER);
 	pgstat_send(&msg, sizeof(msg));
 }
 
@@ -4418,6 +4455,7 @@ pgstat_read_statsfiles(Oid onlydb, bool permanent, bool deep)
 	 */
 	ts = GetCurrentTimestamp();
 	globalStats.bgwriter.stat_reset_timestamp = ts;
+	globalStats.buffers.stat_reset_timestamp = ts;
 	archiverStats.stat_reset_timestamp = ts;
 	walStats.stat_reset_timestamp = ts;
 
@@ -5583,10 +5621,45 @@ pgstat_recv_resetsharedcounter(PgStat_MsgResetsharedcounter *msg, int len)
 {
 	if (msg->m_resettarget == RESET_BGWRITER)
 	{
-		/* Reset the global, bgwriter and checkpointer statistics for the cluster. */
-		memset(&globalStats, 0, sizeof(globalStats));
+		/*
+		 * Reset the global bgwriter and checkpointer statistics for the
+		 * cluster.
+		 */
+		memset(&globalStats.checkpointer, 0, sizeof(globalStats.checkpointer));
+		memset(&globalStats.bgwriter, 0, sizeof(globalStats.bgwriter));
 		globalStats.bgwriter.stat_reset_timestamp = GetCurrentTimestamp();
 	}
+	else if (msg->m_resettarget == RESET_BUFFERS)
+	{
+		/*
+		 * Because the stats collector cannot write to live backends'
+		 * PgBackendStatuses, it maintains an array of "resets". The reset
+		 * message contains the current values of these counters for live
+		 * backends. The stats collector saves these in its "resets" array,
+		 * then zeroes out the exited backends' saved IO op counters. This is
+		 * required to calculate an accurate total for each IO op counter post
+		 * reset.
+		 */
+		BackendType backend_type = msg->m_backend_resets.backend_type;
+
+		/*
+		 * Though globalStats.buffers only needs to be reset once, doing so
+		 * for every message is less brittle and the extra cost is irrelevant
+		 * given how often stats are reset.
+		 */
+		memset(&globalStats.buffers.ops, 0, sizeof(globalStats.buffers.ops));
+		globalStats.buffers.stat_reset_timestamp = GetCurrentTimestamp();
+
+		/*
+		 * Subtract 1 from backend_type as the sender sent a valid BackendType
+		 * but the resets array does not contain an entry for B_INVALID
+		 * BackendType.
+		 */
+		Assert(backend_type > B_INVALID);
+		memcpy(&globalStats.buffers.resets[backend_type - 1],
+				&msg->m_backend_resets.iop.io_path_ops,
+				sizeof(msg->m_backend_resets.iop.io_path_ops));
+	}
 	else if (msg->m_resettarget == RESET_ARCHIVER)
 	{
 		/* Reset the archiver statistics for the cluster. */
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index 413cc605f8..9e9ca3e5a6 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -630,6 +630,35 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 	PGSTAT_END_WRITE_ACTIVITY(beentry);
 }
 
+/*
+ * Iterate through BackendStatusArray and capture live backends' stats on IO
+ * Ops for all IO Paths, adding them to that backend type's member of the
+ * backend_io_path_ops structure.
+ */
+void
+pgstat_report_live_backend_io_path_ops(PgStatIOPathOps *backend_io_path_ops)
+{
+	PgBackendStatus *beentry = BackendStatusArray;
+
+	/*
+	 * Loop through live backends and capture reset values
+	 */
+	for (int i = 0; i < MaxBackends + NUM_AUXPROCTYPES; i++, beentry++)
+	{
+		/* Don't count dead backends or those with type B_INVALID. */
+		Assert(beentry->st_backendType >= B_INVALID);
+		if (beentry->st_procpid == 0 || beentry->st_backendType == B_INVALID)
+			continue;
+
+		/*
+		 * Subtract 1 from the BackendType to arrive at a valid index in the
+		 * array, as it does not contain a spot for B_INVALID BackendType.
+		 */
+		pgstat_sum_io_path_ops(backend_io_path_ops[beentry->st_backendType - 1].io_path_ops,
+							   (IOOps *) beentry->io_path_stats);
+	}
+}
+
 /* --------
  * pgstat_report_query_id() -
  *
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index f99be84db6..2496d7e071 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -142,6 +142,7 @@ typedef enum PgStat_Shared_Reset_Target
 {
 	RESET_ARCHIVER,
 	RESET_BGWRITER,
+	RESET_BUFFERS,
 	RESET_WAL
 } PgStat_Shared_Reset_Target;
 
@@ -357,7 +358,8 @@ typedef struct PgStatIOPathOps
 
 /*
  * Sent by a backend to the stats collector to report all IO Ops for all IO
- * Paths for a given type of a backend. This will happen when the backend exits.
+ * Paths for a given type of a backend. This will happen when the backend exits
+ * or when stats are reset.
  */
 typedef struct PgStat_MsgIOPathOps
 {
@@ -377,9 +379,12 @@ typedef struct PgStat_MsgIOPathOps
  */
 typedef struct PgStat_BackendIOPathOps
 {
+	TimestampTz stat_reset_timestamp;
 	PgStatIOPathOps ops[BACKEND_NUM_TYPES];
+	PgStatIOPathOps resets[BACKEND_NUM_TYPES];
 } PgStat_BackendIOPathOps;
 
+
 /* ----------
  * PgStat_MsgResetcounter		Sent by the backend to tell the collector
  *								to reset counters
@@ -400,6 +405,7 @@ typedef struct PgStat_MsgResetsharedcounter
 {
 	PgStat_MsgHdr m_hdr;
 	PgStat_Shared_Reset_Target m_resettarget;
+	PgStat_MsgIOPathOps m_backend_resets;
 } PgStat_MsgResetsharedcounter;
 
 /* ----------
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index 2e5e949453..acb4a85eef 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -339,6 +339,7 @@ extern void pgstat_bestart(void);
 extern void pgstat_clear_backend_activity_snapshot(void);
 
 /* Activity reporting functions */
+typedef struct PgStatIOPathOps PgStatIOPathOps;
 
 static inline void
 pgstat_inc_ioop(IOOp io_op, IOPath io_path)
@@ -366,6 +367,7 @@ pgstat_inc_ioop(IOOp io_op, IOPath io_path)
 	}
 }
 extern void pgstat_report_activity(BackendState state, const char *cmd_str);
+extern void pgstat_report_live_backend_io_path_ops(PgStatIOPathOps *backend_io_path_ops);
 extern void pgstat_report_query_id(uint64 query_id, bool force);
 extern void pgstat_report_tempfile(size_t filesize);
 extern void pgstat_report_appname(const char *appname);
-- 
2.32.0

