From 61bfc251e3e06c08210b01e1447fe7431b9b7fe5 Mon Sep 17 00:00:00 2001
From: Melanie Plageman <melanieplageman@gmail.com>
Date: Wed, 24 Nov 2021 12:07:37 -0500
Subject: [PATCH v15 5/7] Add system view tracking IO ops per backend type

Add pg_stat_buffers, a system view which tracks the number of IO
operations (allocs, writes, fsyncs, and extends) done through each IO
path (e.g. shared buffers, local buffers, unbuffered IO) by each type of
backend.

Some of these should always be zero. For example, checkpointer does not
use a BufferAccessStrategy (currently), so the "strategy" IO path for
checkpointer will be 0 for all IO operations (alloc, write, fsync, and
extend). All possible combinations of IO Path and IO Op are enumerated
in the view but not all are populated or even possible at this point.

All backends increment a counter in their PgBackendStatus when
performing an IO operation. On exit, backends send these stats to the
stats collector to be persisted.

When the pg_stat_buffers view is queried, one backend will sum live
backends' stats with saved stats from exited backends and subtract saved
reset stats, returning the total.

Each row of the view is stats for a particular backend type for a
particular IO path (e.g. shared buffer accesses by checkpointer) and
each column in the view is the total number of IO operations done (e.g.
writes).
So a cell in the view would be, for example, the number of shared
buffers written by checkpointer since the last stats reset.

Discussion: https://www.postgresql.org/message-id/flat/20210415235954.qcypb4urtovzkat5%40alap3.anarazel.de#724d5cce4bcb587f9167b80a5824bc5c
---
 doc/src/sgml/monitoring.sgml                | 110 +++++++++++++-
 src/backend/catalog/system_views.sql        |  11 ++
 src/backend/postmaster/pgstat.c             |  13 ++
 src/backend/utils/activity/backend_status.c |  20 ++-
 src/backend/utils/adt/pgstatfuncs.c         | 151 ++++++++++++++++++++
 src/include/catalog/pg_proc.dat             |   9 ++
 src/include/pgstat.h                        |   1 +
 src/include/utils/backend_status.h          |   5 +
 src/test/regress/expected/rules.out         |   8 ++
 9 files changed, 325 insertions(+), 3 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index d4dd5d3623..56d2fd884f 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -435,6 +435,15 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
      </entry>
      </row>
 
+     <row>
+      <entry><structname>pg_stat_buffers</structname><indexterm><primary>pg_stat_buffers</primary></indexterm></entry>
+      <entry>A row for each IO path for each backend type showing
+      statistics about backend IO operations. See
+       <link linkend="monitoring-pg-stat-buffers-view">
+       <structname>pg_stat_buffers</structname></link> for details.
+     </entry>
+     </row>
+
      <row>
       <entry><structname>pg_stat_wal</structname><indexterm><primary>pg_stat_wal</primary></indexterm></entry>
       <entry>One row only, showing statistics about WAL activity. See
@@ -3482,6 +3491,101 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
 
  </sect2>
 
+ <sect2 id="monitoring-pg-stat-buffers-view">
+  <title><structname>pg_stat_buffers</structname></title>
+
+  <indexterm>
+   <primary>pg_stat_buffers</primary>
+  </indexterm>
+
+  <para>
+   The <structname>pg_stat_buffers</structname> view has a row for each backend
+   type for each possible IO path, containing global data for the cluster for
+   that backend and IO path.
+  </para>
+
+  <table id="pg-stat-buffers-view" xreflabel="pg_stat_buffers">
+   <title><structname>pg_stat_buffers</structname> View</title>
+   <tgroup cols="1">
+    <thead>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       Column Type
+      </para>
+      <para>
+       Description
+      </para></entry>
+     </row>
+    </thead>
+    <tbody>
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>backend_type</structfield> <type>text</type>
+      </para>
+      <para>
+       Type of backend (e.g. background worker, autovacuum worker).
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>io_path</structfield> <type>text</type>
+      </para>
+      <para>
+       IO path taken (e.g. shared buffers, direct).
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>alloc</structfield> <type>integer</type>
+      </para>
+      <para>
+       Number of buffers allocated.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>extend</structfield> <type>integer</type>
+      </para>
+      <para>
+       Number of buffers extended.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>fsync</structfield> <type>integer</type>
+      </para>
+      <para>
+       Number of buffers fsynced.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>write</structfield> <type>integer</type>
+      </para>
+      <para>
+       Number of buffers written.
+      </para></entry>
+     </row>
+
+     <row>
+      <entry role="catalog_table_entry"><para role="column_definition">
+       <structfield>stats_reset</structfield> <type>timestamp with time zone</type>
+      </para>
+      <para>
+       Time at which these statistics were last reset.
+      </para></entry>
+     </row>
+    </tbody>
+   </tgroup>
+  </table>
+
+ </sect2>
+
  <sect2 id="monitoring-pg-stat-wal-view">
    <title><structname>pg_stat_wal</structname></title>
 
@@ -5082,8 +5186,10 @@ SELECT pid, wait_event_type, wait_event FROM pg_stat_activity WHERE wait_event i
         all the counters shown in
         the <structname>pg_stat_bgwriter</structname>
         view, <literal>archiver</literal> to reset all the counters shown in
-        the <structname>pg_stat_archiver</structname> view or <literal>wal</literal>
-        to reset all the counters shown in the <structname>pg_stat_wal</structname> view.
+        the <structname>pg_stat_archiver</structname> view, <literal>wal</literal>
+        to reset all the counters shown in the <structname>pg_stat_wal</structname> view, 
+        or <literal>buffers</literal> to reset all the counters shown in the
+        <structname>pg_stat_buffers</structname> view.
        </para>
        <para>
         This function is restricted to superusers by default, but other users
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index eb560955cd..86ca35121b 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1076,6 +1076,17 @@ CREATE VIEW pg_stat_bgwriter AS
         pg_stat_get_buf_alloc() AS buffers_alloc,
         pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
 
+CREATE VIEW pg_stat_buffers AS
+SELECT
+       b.backend_type,
+       b.io_path,
+       b.alloc,
+       b.extend,
+       b.fsync,
+       b.write,
+       b.stats_reset
+FROM pg_stat_get_buffers() b;
+
 CREATE VIEW pg_stat_wal AS
     SELECT
         w.wal_records,
diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 0d18a4dc02..4bcd99bd6f 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -2794,6 +2794,19 @@ pgstat_twophase_postabort(TransactionId xid, uint16 info,
 		rec->tuples_inserted + rec->tuples_updated;
 }
 
+/*
+ *	Support function for SQL-callable pgstat* functions. Returns a pointer to
+ *	the PgStat_BackendIOPathOps structure tracking IO op statistics for both
+ *	exited backends and reset arithmetic.
+ */
+PgStat_BackendIOPathOps *
+pgstat_fetch_exited_backend_buffers(void)
+{
+	backend_read_statsfile();
+
+	return &globalStats.buffers;
+}
+
 
 /* ----------
  * pgstat_fetch_stat_dbentry() -
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index 1617033e26..c3e5d23f99 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -50,7 +50,7 @@ int			pgstat_track_activity_query_size = 1024;
 PgBackendStatus *MyBEEntry = NULL;
 
 
-static PgBackendStatus *BackendStatusArray = NULL;
+PgBackendStatus *BackendStatusArray = NULL;
 static char *BackendAppnameBuffer = NULL;
 static char *BackendClientHostnameBuffer = NULL;
 static char *BackendActivityBuffer = NULL;
@@ -236,6 +236,24 @@ CreateSharedBackendStatus(void)
 #endif
 }
 
+const char *
+GetIOPathDesc(IOPath io_path)
+{
+
+	switch (io_path)
+	{
+		case IOPATH_DIRECT:
+			return "direct";
+		case IOPATH_LOCAL:
+			return "local";
+		case IOPATH_SHARED:
+			return "shared";
+		case IOPATH_STRATEGY:
+			return "strategy";
+	}
+	return "unknown IO path";
+}
+
 /*
  * Initialize pgstats backend activity state, and set up our on-proc-exit
  * hook.  Called from InitPostgres and AuxiliaryProcessMain. For auxiliary
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index e64857e540..0c370fdce2 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -1796,6 +1796,157 @@ pg_stat_get_buf_alloc(PG_FUNCTION_ARGS)
 	PG_RETURN_INT64(pgstat_fetch_stat_bgwriter()->buf_alloc);
 }
 
+/*
+* When adding a new column to the pg_stat_buffers view, add a new enum
+* value here above COLUMN_LENGTH.
+*/
+enum
+{
+	COLUMN_BACKEND_TYPE,
+	COLUMN_IO_PATH,
+	COLUMN_ALLOCS,
+	COLUMN_EXTENDS,
+	COLUMN_FSYNCS,
+	COLUMN_WRITES,
+	COLUMN_RESET_TIME,
+	COLUMN_LENGTH,
+};
+
+#define NROWS ((BACKEND_NUM_TYPES - 1) * IOPATH_NUM_TYPES)
+/*
+ * Helper function to get the correct row in the pg_stat_buffers view.
+ */
+static inline Datum *
+get_pg_stat_buffers_row(Datum all_values[NROWS][COLUMN_LENGTH], BackendType backend_type, IOPath io_path)
+{
+	/*
+	 * Subtract 1 from backend_type to avoid having rows for B_INVALID
+	 * BackendType
+	 */
+	return all_values[(backend_type - 1) * IOPATH_NUM_TYPES + io_path];
+}
+
+Datum
+pg_stat_get_buffers(PG_FUNCTION_ARGS)
+{
+	PgStat_BackendIOPathOps *backend_io_path_ops;
+	PgBackendStatus *beentry;
+	int			backend_type, io_path;
+	int			i;
+	Datum		reset_time;
+
+	ReturnSetInfo *rsinfo;
+	TupleDesc	tupdesc;
+	Tuplestorestate *tupstore;
+	MemoryContext per_query_ctx;
+	MemoryContext oldcontext;
+
+	Datum		all_values[NROWS][COLUMN_LENGTH];
+	bool		all_nulls[NROWS][COLUMN_LENGTH];
+
+	rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
+
+	/* check to see if caller supports us returning a tuplestore */
+	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("set-valued function called in context that cannot accept a set")));
+	if (!(rsinfo->allowedModes & SFRM_Materialize))
+		ereport(ERROR,
+				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+				 errmsg("materialize mode required, but it is not allowed in this context")));
+
+	/* Build a tuple descriptor for our result type */
+	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+		elog(ERROR, "return type must be a row type");
+
+	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
+	oldcontext = MemoryContextSwitchTo(per_query_ctx);
+
+	tupstore = tuplestore_begin_heap(true, false, work_mem);
+	rsinfo->returnMode = SFRM_Materialize;
+	rsinfo->setResult = tupstore;
+	rsinfo->setDesc = tupdesc;
+	MemoryContextSwitchTo(oldcontext);
+
+	memset(all_values, 0, sizeof(all_values));
+	memset(all_nulls, 0, sizeof(all_nulls));
+
+	/*
+	 * Loop through all live backends and count their IO Ops for each IO Path
+	 */
+	beentry = BackendStatusArray;
+
+	for (i = 0; i < MaxBackends + NUM_AUXPROCTYPES; i++)
+	{
+		IOOps	   *io_ops;
+
+		beentry++;
+		/* Don't count dead backends. They should already be counted */
+		if (beentry->st_procpid == 0)
+			continue;
+
+		io_ops = beentry->io_path_stats;
+
+		for (io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
+		{
+			Datum *values = get_pg_stat_buffers_row(all_values, beentry->st_backendType, io_path);
+
+			/*
+			 * COLUMN_RESET_TIME, COLUMN_BACKEND_TYPE, and COLUMN_IO_PATH will
+			 * all be set when looping through exited backends array
+			 */
+			values[COLUMN_ALLOCS] += pg_atomic_read_u64(&io_ops->allocs);
+			values[COLUMN_EXTENDS] += pg_atomic_read_u64(&io_ops->extends);
+			values[COLUMN_FSYNCS] += pg_atomic_read_u64(&io_ops->fsyncs);
+			values[COLUMN_WRITES] += pg_atomic_read_u64(&io_ops->writes);
+			io_ops++;
+		}
+	}
+
+	/* Add stats from all exited backends */
+	backend_io_path_ops = pgstat_fetch_exited_backend_buffers();
+
+	reset_time = TimestampTzGetDatum(backend_io_path_ops->stat_reset_timestamp);
+
+	/* 0 is not a valid BackendType */
+	for (backend_type = 1; backend_type < BACKEND_NUM_TYPES; backend_type++)
+	{
+		PgStatIOOps *io_ops = backend_io_path_ops->ops[backend_type].io_path_ops;
+		PgStatIOOps *resets = backend_io_path_ops->resets[backend_type].io_path_ops;
+
+		Datum		backend_type_desc = CStringGetTextDatum(GetBackendTypeDesc(backend_type));
+
+		for (io_path = 0; io_path < IOPATH_NUM_TYPES; io_path++)
+		{
+			Datum *values = get_pg_stat_buffers_row(all_values, backend_type, io_path);
+
+			values[COLUMN_BACKEND_TYPE] = backend_type_desc;
+			values[COLUMN_IO_PATH] = CStringGetTextDatum(GetIOPathDesc(io_path));
+			values[COLUMN_ALLOCS] = values[COLUMN_ALLOCS] + io_ops->allocs - resets->allocs;
+			values[COLUMN_EXTENDS] = values[COLUMN_EXTENDS] + io_ops->extends - resets->extends;
+			values[COLUMN_FSYNCS] = values[COLUMN_FSYNCS] + io_ops->fsyncs - resets->fsyncs;
+			values[COLUMN_WRITES] = values[COLUMN_WRITES] + io_ops->writes - resets->writes;
+			values[COLUMN_RESET_TIME] = reset_time;
+			io_ops++;
+			resets++;
+		}
+	}
+
+	for (i = 0; i < NROWS; i++)
+	{
+		Datum	   *values = all_values[i];
+		bool	   *nulls = all_nulls[i];
+
+		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
+	}
+
+	/* clean up and return the tuplestore */
+	tuplestore_donestoring(tupstore);
+
+	return (Datum) 0;
+}
+
 /*
  * Returns statistics of WAL activity
  */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index e934361dc3..fda916f911 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5646,6 +5646,15 @@
   proname => 'pg_stat_get_buf_alloc', provolatile => 's', proparallel => 'r',
   prorettype => 'int8', proargtypes => '', prosrc => 'pg_stat_get_buf_alloc' },
 
+{ oid => '8459', descr => 'statistics: counts of all IO operations done to all IO paths by each type of backend.',
+  proname => 'pg_stat_get_buffers', provolatile => 's', proisstrict => 'f',
+  prorows => '52', proretset => 't',
+  proparallel => 'r', prorettype => 'record', proargtypes => '',
+  proallargtypes => '{text,text,int8,int8,int8,int8,timestamptz}',
+  proargmodes => '{o,o,o,o,o,o,o}',
+  proargnames => '{backend_type,io_path,alloc,extend,fsync,write,stats_reset}',
+  prosrc => 'pg_stat_get_buffers' },
+
 { oid => '1136', descr => 'statistics: information about WAL activity',
   proname => 'pg_stat_get_wal', proisstrict => 'f', provolatile => 's',
   proparallel => 'r', prorettype => 'record', proargtypes => '',
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 8c291f1f0d..303cddb4c0 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -1179,6 +1179,7 @@ extern void pgstat_sum_io_path_ops(PgStatIOOps *dest, IOOps *src);
  * generate the pgstat* views.
  * ----------
  */
+extern PgStat_BackendIOPathOps *pgstat_fetch_exited_backend_buffers(void);
 extern PgStat_StatDBEntry *pgstat_fetch_stat_dbentry(Oid dbid);
 extern PgStat_StatTabEntry *pgstat_fetch_stat_tabentry(Oid relid);
 extern PgStat_StatFuncEntry *pgstat_fetch_stat_funcentry(Oid funcid);
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index 9c997cace8..dd983fc949 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -321,6 +321,7 @@ extern PGDLLIMPORT int pgstat_track_activity_query_size;
  * ----------
  */
 extern PGDLLIMPORT PgBackendStatus *MyBEEntry;
+extern PGDLLIMPORT PgBackendStatus *BackendStatusArray;
 
 
 /* ----------
@@ -336,6 +337,10 @@ extern void CreateSharedBackendStatus(void);
  * ----------
  */
 
+/* Utility functions */
+extern const char *GetIOPathDesc(IOPath io_path);
+
+
 /* Initialization functions */
 extern void pgstat_beinit(void);
 extern void pgstat_bestart(void);
diff --git a/src/test/regress/expected/rules.out b/src/test/regress/expected/rules.out
index 2fa00a3c29..5e5a0324ee 100644
--- a/src/test/regress/expected/rules.out
+++ b/src/test/regress/expected/rules.out
@@ -1828,6 +1828,14 @@ pg_stat_bgwriter| SELECT pg_stat_get_bgwriter_timed_checkpoints() AS checkpoints
     pg_stat_get_buf_fsync_backend() AS buffers_backend_fsync,
     pg_stat_get_buf_alloc() AS buffers_alloc,
     pg_stat_get_bgwriter_stat_reset_time() AS stats_reset;
+pg_stat_buffers| SELECT b.backend_type,
+    b.io_path,
+    b.alloc,
+    b.extend,
+    b.fsync,
+    b.write,
+    b.stats_reset
+   FROM pg_stat_get_buffers() b(backend_type, io_path, alloc, extend, fsync, write, stats_reset);
 pg_stat_database| SELECT d.oid AS datid,
     d.datname,
         CASE
-- 
2.32.0

