From 6d84255845e9f945f96a30e787befe93fb162695 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 24 Nov 2021 12:07:37 -0500
Subject: [PATCH v17 5/7] Add system view tracking IO ops per backend type

Add pg_stat_buffers, a system view which tracks the number of IO
operations (allocs, writes, fsyncs, and extends) done through each IO
path (e.g. shared buffers, local buffers, unbuffered IO) by each type of
backend.

Some of these should always be zero. For example, checkpointer does not
use a BufferAccessStrategy (currently), so the "strategy" IO path for
checkpointer will be 0 for all IO operations (alloc, write, fsync, and
extend). All possible combinations of IO Path and IO Op are enumerated
in the view but not all are populated or even possible at this point.

All backends increment a counter in their PgBackendStatus when
performing an IO operation. On exit, backends send these stats to the
stats collector to be persisted.

When the pg_stat_buffers view is queried, one backend will sum live
backends' stats with saved stats from exited backends and subtract saved
reset stats, returning the total.

Each row of the view is stats for a particular backend type for a
particular IO path (e.g. shared buffer accesses by checkpointer) and
each column in the view is the total number of IO operations done (e.g.
writes).
So a cell in the view would be, for example, the number of shared
buffers written by checkpointer since the last stats reset.

Discussion: https://www.postgresql.org/message-id/flat/20210415235954.qcypb4urtovzkat5%40alap3.anarazel.de#724d5cce4bcb587f9167b80a5824bc5c
---
 doc/src/sgml/monitoring.sgml                | 110 +++++++++++++-
 src/backend/catalog/system_views.sql        |  11 ++
 src/backend/postmaster/pgstat.c             |  13 ++
 src/backend/utils/activity/backend_status.c |  19 ++-
 src/backend/utils/adt/pgstatfuncs.c         | 154 ++++++++++++++++++++
 src/include/catalog/pg_proc.dat             |   9 ++
 src/include/pgstat.h                        |   1 +
 src/include/utils/backend_status.h          |   5 +
 src/test/regress/expected/rules.out         |   8 +
 9 files changed, 327 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index bda3eef309..b16952d439 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -435,6 +435,15 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
      </entry>
      </row>
 
+     <row>
+      <entry><structname>pg_stat_buffers</structname><indexterm><primary>pg_stat_buffers</primary></indexterm></entry>
+      <entry>A row for each IO path for each backend type showing
+      statistics about backend IO operations. See
+       <link linkend="monitoring-pg-stat-buffers-view">
+       <structname>pg_stat_buffers</structname></link> for details.
+     </entry>
+     </row>
+
      <row>
       <entry><structname>pg_stat_wal</structname><indexterm><primary>pg_stat_wal</primary></indexterm></entry>
       <entry>One row only, showing statistics about WAL activity. See
@@ -3613,6 +3622,101 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
 
  </sect2>
 
+ <sect2 id="monitoring-pg-stat-buffers-view">
+  <title><structname>pg_stat_buffers</structname></title>
+
+  <indexterm>
+   <primary>pg_stat_buffers</primary>
+  </indexterm>
+
+  <para>
+   The <structname>pg_stat_buffers</structname> view has a row for each backend
+   type for each possible IO path, containing global data for the cluster for
+   that backend and IO path.
+  </para>
+
+  <table id="pg-stat-buffers-view" xreflabel="pg_stat_buffers">
+   <title><structname>pg_stat_buffers</structname> View</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       Column Type
+      </para>
+      <para>
+       Description
+      </para></entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>backend_type</structfield> <type>text</type>
+      </para>
+      <para>
+       Type of backend (e.g. background worker, autovacuum worker).
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>io_path</structfield> <type>text</type>
+      </para>
+      <para>
+       IO path taken (e.g. shared buffers, direct).
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>alloc</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of buffers allocated.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>extend</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of buffers extended.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>fsync</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of buffers fsynced.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>write</structfield> <type>bigint</type>
+      </para>
+      <para>
+       Number of buffers written.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>stats_reset</structfield> <type>timestamp with time zone</type>
+      </para>
+      <para>
+       Time at which these statistics were last reset.
+      </para></entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+ </sect2>
+
  <sect2 id="monitoring-pg-stat-wal-view">
    <title><structname>pg_stat_wal</structname></title>
 
@@ -5213,8 +5317,10 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
         all the counters shown in
         the <structname>pg_stat_bgwriter</structname>
         view, <literal>archiver</literal> to reset all the counters shown in
-        the <structname>pg_stat_archiver</structname> view or <literal>wal</literal>
-        to reset all the counters shown in the <structname>pg_stat_wal</structname> view.
+        the <structname>pg_stat_archiver</structname> view, <literal>wal</literal>
+        to reset all the counters shown in the <structname>pg_stat_wal</structname> view, 
+        or <literal>buffers</literal> to reset all the counters shown in the
+        <structname>pg_stat_buffers</structname> view.
        </para>
        <para>
         This function is restricted to superusers by default, but other users
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 61b515cdb8..e214d23056 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1076,6 +1076,17 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_alloc() AS buffers_alloc,
         pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_buffers AS
+SELECT
+       b.backend_type,
+       b.io_path,
+       b.alloc,
+       b.extend,
+       b.fsync,
+       b.write,
+       b.stats_reset
+FROM pg_stat_get_buffers() b;
+
 CREATE VIEW pg_stat_wal AS
     SELECT
         w.wal_records,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index c40b375b9a..9b041cb1b9 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -2925,6 +2925,19 @@ pgstat_twophase_postabort(TransactionId xid, uint16 info,
 		rec->tuples_inserted + rec->tuples_updated;
 }
 
+/*
+ *	Support function for SQL-callable pgstat* functions. Returns a pointer to
+ *	the PgStat_BackendIOPathOps structure tracking IO op statistics for both
+ *	exited backends and reset arithmetic.
+ */
+PgStat_BackendIOPathOps *
+pgstat_fetch_exited_backend_buffers(void)
+{
+	backend_read_statsfile();
+
+	return &globalStats.buffers;
+}
+
 
 /* ----------
  * pgstat_fetch_stat_dbentry() -
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index 9e9ca3e5a6..f1997441bc 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -50,7 +50,7 @@ int			pgstat_track_activity_query_size = 1024;
 PgBackendStatus *MyBEEntry = NULL;
 
 
-static PgBackendStatus *BackendStatusArray = NULL;
+PgBackendStatus *BackendStatusArray = NULL;
 static char *BackendAppnameBuffer = NULL;
 static char *BackendClientHostnameBuffer = NULL;
 static char *BackendActivityBuffer = NULL;
@@ -236,6 +236,23 @@ CreateSharedBackendStatus(void)
 #endif
 }
 
+const char *
+GetIOPathDesc(IOPath io_path)
+{
+	switch (io_path)
+	{
+		case IOPATH_DIRECT:
+			return "direct";
+		case IOPATH_LOCAL:
+			return "local";
+		case IOPATH_SHARED:
+			return "shared";
+		case IOPATH_STRATEGY:
+			return "strategy";
+	}
+	return "unknown IO path";
+}
+
 /*
  * Initialize pgstats backend activity state, and set up our on-proc-exit
  * hook.  Called from InitPostgres and AuxiliaryProcessMain. For auxiliary
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index f529c1561a..9fd7a8cdb9 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1796,6 +1796,160 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
 	PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc);
 }
 
+/*
+* When adding a new column to the pg_stat_buffers view, add a new enum
+* value here above COLUMN_LENGTH.
+*/
+enum
+{
+	COLUMN_BACKEND_TYPE,
+	COLUMN_IO_PATH,
+	COLUMN_ALLOCS,
+	COLUMN_EXTENDS,
+	COLUMN_FSYNCS,
+	COLUMN_WRITES,
+	COLUMN_RESET_TIME,
+	COLUMN_LENGTH,
+};
+
+/*
+ * Helper function to get the correct row in the pg_stat_buffers view.
+ */
+static inline Datum *
+get_pg_stat_buffers_row(Datum all_values[BACKEND_NUM_TYPES][IOPATH_NUM_TYPES][COLUMN_LENGTH],
+		BackendType backend_type, IOPath io_path)
+{
+	/*
+	 * Caller must pass in a valid BackendType, as there are no rows for
+	 * B_INVALID. Subtract 1 to arrive at the correct row.
+	 */
+	Assert(backend_type > B_INVALID);
+	return all_values[backend_type - 1][io_path];
+}
+
+Datum
+pg_stat_get_buffers(PG_FUNCTION_ARGS)
+{
+	PgStat_BackendIOPathOps *backend_io_path_ops;
+	PgBackendStatus *beentry;
+	Datum		reset_time;
+
+	ReturnSetInfo *rsinfo;
+	TupleDesc	tupdesc;
+	Tuplestorestate *tupstore;
+	MemoryContext per_query_ctx;
+	MemoryContext oldcontext;
+
+	Datum		all_values[BACKEND_NUM_TYPES][IOPATH_NUM_TYPES][COLUMN_LENGTH];
+	bool		all_nulls[BACKEND_NUM_TYPES][IOPATH_NUM_TYPES][COLUMN_LENGTH];
+
+	rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+	/* check to see if caller supports us returning a tuplestore */
+	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("set-valued function called in context that cannot accept a set")));
+	if (!(rsinfo->allowedModes & SFRM_Materialize))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("materialize mode required, but it is not allowed in this context")));
+
+	/* Build a tuple descriptor for our result type */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+	oldcontext = MemoryContextSwitchTo(per_query_ctx);
+	tupstore = tuplestore_begin_heap((bool) (rsinfo->allowedModes & SFRM_Materialize_Random),
+			false, work_mem);
+	rsinfo->returnMode = SFRM_Materialize;
+	rsinfo->setResult = tupstore;
+	rsinfo->setDesc = tupdesc;
+	MemoryContextSwitchTo(oldcontext);
+
+	memset(all_values, 0, sizeof(all_values));
+	memset(all_nulls, 0, sizeof(all_nulls));
+
+	 /* Loop through all live backends and count their IO Ops for each IO Path */
+	beentry = BackendStatusArray;
+
+	for (int i = 0; i < MaxBackends + NUM_AUXPROCTYPES; i++, beentry++)
+	{
+		IOOps	   *io_ops;
+
+		/* Don't count dead backends. They will be added below */
+		Assert(beentry->st_backendType >= B_INVALID);
+		if (beentry->st_procpid == 0 || beentry->st_backendType == B_INVALID)
+			continue;
+
+		io_ops = beentry->io_path_stats;
+
+		for (int io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
+		{
+			Datum *row = get_pg_stat_buffers_row(all_values, beentry->st_backendType, io_path);
+
+			/*
+			 * COLUMN_RESET_TIME, COLUMN_BACKEND_TYPE, and COLUMN_IO_PATH will
+			 * all be set when looping through exited backends array
+			 */
+			row[COLUMN_ALLOCS] += pg_atomic_read_u64(&io_ops->allocs);
+			row[COLUMN_EXTENDS] += pg_atomic_read_u64(&io_ops->extends);
+			row[COLUMN_FSYNCS] += pg_atomic_read_u64(&io_ops->fsyncs);
+			row[COLUMN_WRITES] += pg_atomic_read_u64(&io_ops->writes);
+			io_ops++;
+		}
+	}
+
+	/* Add stats from all exited backends */
+	backend_io_path_ops = pgstat_fetch_exited_backend_buffers();
+
+	reset_time = TimestampTzGetDatum(backend_io_path_ops->stat_reset_timestamp);
+
+	for (int backend_type_idx = 0; backend_type_idx < BACKEND_NUM_TYPES; backend_type_idx++)
+	{
+		PgStatIOOps *io_ops = backend_io_path_ops->ops[backend_type_idx].io_path_ops;
+		PgStatIOOps *resets = backend_io_path_ops->resets[backend_type_idx].io_path_ops;
+
+		Datum		backend_type_desc =
+			CStringGetTextDatum(GetBackendTypeDesc(backend_type_idx + 1));
+
+		for (int io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
+		{
+			/*
+			 * all_values does not include rows for the B_INVALID BackendType
+			 * and get_pg_stat_buffers_row() expects a valid BackendType, so
+			 * add 1 to the backend_type_idx to ensure the correct rows are
+			 * returned for the desired BackendType.
+			 */
+			Datum *row = get_pg_stat_buffers_row(all_values, backend_type_idx + 1, io_path);
+
+			row[COLUMN_BACKEND_TYPE] = backend_type_desc;
+			row[COLUMN_IO_PATH] = CStringGetTextDatum(GetIOPathDesc(io_path));
+			row[COLUMN_RESET_TIME] = reset_time;
+			row[COLUMN_ALLOCS] += io_ops->allocs - resets->allocs;
+			row[COLUMN_EXTENDS] += io_ops->extends - resets->extends;
+			row[COLUMN_FSYNCS] += io_ops->fsyncs - resets->fsyncs;
+			row[COLUMN_WRITES] += io_ops->writes - resets->writes;
+			io_ops++;
+			resets++;
+		}
+	}
+
+	for (int backend_type_idx = 0; backend_type_idx < BACKEND_NUM_TYPES; backend_type_idx++)
+	{
+		for (int io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
+		{
+			Datum	   *values = all_values[backend_type_idx][io_path];
+			bool	   *nulls = all_nulls[backend_type_idx][io_path];
+
+			tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+		}
+	}
+
+	return (Datum) 0;
+}
+
 /*
  * Returns statistics of WAL activity
  */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 79d787cd26..32253383ba 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5654,6 +5654,15 @@
   proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r',
   prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' },
 
+{ oid => '8459', descr => 'statistics: counts of all IO operations done to all IO paths by each type of backend.',
+  proname => 'pg_stat_get_buffers', provolatile => 's', proisstrict => 'f',
+  prorows => '52', proretset => 't',
+  proparallel => 'r', prorettype => 'record', proargtypes => '',
+  proallargtypes => '{text,text,int8,int8,int8,int8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o}',
+  proargnames => '{backend_type,io_path,alloc,extend,fsync,write,stats_reset}',
+  prosrc => 'pg_stat_get_buffers' },
+
 { oid => '1136', descr => 'statistics: information about WAL activity',
   proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 2496d7e071..9e1c635108 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -1278,6 +1278,7 @@ extern void pgstat_sum_io_path_ops(PgStatIOOps *dest, IOOps *src);
  * generate the pgstat* views.
  * ----------
  */
+extern PgStat_BackendIOPathOps *pgstat_fetch_exited_backend_buffers(void);
 extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
 extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
 extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index acb4a85eef..4dbf32d48e 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -317,6 +317,7 @@ extern PGDLLIMPORT int pgstat_track_activity_query_size;
  * ----------
  */
 extern PGDLLIMPORT PgBackendStatus *MyBEEntry;
+extern PGDLLIMPORT PgBackendStatus *BackendStatusArray;
 
 
 /* ----------
@@ -332,6 +333,10 @@ extern void CreateSharedBackendStatus(void);
  * ----------
  */
 
+/* Utility functions */
+extern const char *GetIOPathDesc(IOPath io_path);
+
+
 /* Initialization functions */
 extern void pgstat_beinit(void);
 extern void pgstat_bestart(void);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index b58b062b10..5869ce442f 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1828,6 +1828,14 @@ pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints
     pg_stat_get_buf_fsync_backend() AS buffers_backend_fsync,
     pg_stat_get_buf_alloc() AS buffers_alloc,
     pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
+pg_stat_buffers| SELECT b.backend_type,
+    b.io_path,
+    b.alloc,
+    b.extend,
+    b.fsync,
+    b.write,
+    b.stats_reset
+   FROM pg_stat_get_buffers() b(backend_type, io_path, alloc, extend, fsync, write, stats_reset);
 pg_stat_database| SELECT d.oid AS datid,
     d.datname,
         CASE
-- 
2.32.0

