On Fri, 2021-09-03 at 17:04 -0700, Andres Freund wrote:
> Here's how I think that would look like. While writing up this draft, I found
> two more issues:
> 
> - On windows / 32 bit systems, the session time would overflow if idle for
>   longer than ~4300s. long is only 32 bit. Easy to fix obviously.
> - Right now walsenders, including database connected walsenders, are not
>   reported in connection stats. That doesn't seem quite right to me.
> 
> In the patch I made the message for connecting an explicitly reported message,
> that seems cleaner, because it then happens at a clearly defined point. I
> didn't do the same for disconnecting, but perhaps that would be better? Then
> we could get rid of the whole pgStatSessionEndCause variable.

I have gone over your patch and made the following changes:

- cache the last report time in a static variable pgLastSessionReportTime
- add a comment to explain why we only track normal backends
- pgindent
- an attempt at a commit message

Yours,
Laurenz Albe
From 1fdfac528ec4e75d91b9cf3868eda66a72a41c8f Mon Sep 17 00:00:00 2001
From: Laurenz Albe <laurenz.a...@cybertec.at>
Date: Wed, 8 Sep 2021 06:06:46 +0200
Subject: [PATCH] Fix performance regression from session statistics

Session statistics, as introduced by 960869da08, had several
shortcomings:

- an additional GetCurrentTimestamp() call that also impaired
  the accuracy of the data collected

  This can be avoided by passing the current timestamp we already
  have in pgstat_report_stat().

- an additional statistics UDP packet sent every 500ms

  This is solved by adding the new statistics to PgStat_MsgTabstat.
  This is conceptually ugly, because session statistics are not
  table statistics.  But the struct already contains data unrelated
  to tables, so there is not much damage done.

  Connection and disconnection are reported in separate messages,
  which reduces the total overhead to two messages per session.

The time of the last session statistics report is now cached
in the static variable pgLastSessionReportTime.

Discussion: https://postgr.es/m/20210801205501.nyxzxoelqoo4x2qc%40alap3.anarazel.de
---
 src/backend/postmaster/pgstat.c             | 177 +++++++++++++-------
 src/backend/tcop/postgres.c                 |   2 +
 src/backend/utils/activity/backend_status.c |   4 +-
 src/include/pgstat.h                        |  34 ++--
 src/tools/pgindent/typedefs.list            |   3 +-
 5 files changed, 141 insertions(+), 79 deletions(-)

diff --git a/src/backend/postmaster/pgstat.c b/src/backend/postmaster/pgstat.c
index 3450a10129..7aaf928315 100644
--- a/src/backend/postmaster/pgstat.c
+++ b/src/backend/postmaster/pgstat.c
@@ -246,6 +246,7 @@ static int	pgStatXactCommit = 0;
 static int	pgStatXactRollback = 0;
 PgStat_Counter pgStatBlockReadTime = 0;
 PgStat_Counter pgStatBlockWriteTime = 0;
+static PgStat_Counter pgLastSessionReportTime = 0;
 PgStat_Counter pgStatActiveTime = 0;
 PgStat_Counter pgStatTransactionIdleTime = 0;
 SessionEndType pgStatSessionEndCause = DISCONNECT_NORMAL;
@@ -330,11 +331,11 @@ static bool pgstat_db_requested(Oid databaseid);
 static PgStat_StatReplSlotEntry *pgstat_get_replslot_entry(NameData name, bool create_it);
 static void pgstat_reset_replslot(PgStat_StatReplSlotEntry *slotstats, TimestampTz ts);
 
-static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg);
+static void pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now);
 static void pgstat_send_funcstats(void);
 static void pgstat_send_slru(void);
 static HTAB *pgstat_collect_oids(Oid catalogid, AttrNumber anum_oid);
-static void pgstat_send_connstats(bool disconnect, TimestampTz last_report);
+static void pgstat_report_disconnect(Oid dboid);
 
 static PgStat_TableStatus *get_tabstat_entry(Oid rel_id, bool isshared);
 
@@ -366,7 +367,8 @@ static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
 static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
 static void pgstat_recv_deadlock(PgStat_MsgDeadlock *msg, int len);
 static void pgstat_recv_checksum_failure(PgStat_MsgChecksumFailure *msg, int len);
-static void pgstat_recv_connstat(PgStat_MsgConn *msg, int len);
+static void pgstat_recv_connect(PgStat_MsgConnect *msg, int len);
+static void pgstat_recv_disconnect(PgStat_MsgDisconnect *msg, int len);
 static void pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len);
 static void pgstat_recv_tempfile(PgStat_MsgTempFile *msg, int len);
 
@@ -890,12 +892,11 @@ pgstat_report_stat(bool disconnect)
 		!TimestampDifferenceExceeds(last_report, now, PGSTAT_STAT_INTERVAL))
 		return;
 
-	/* for backends, send connection statistics */
-	if (MyBackendType == B_BACKEND)
-		pgstat_send_connstats(disconnect, last_report);
-
 	last_report = now;
 
+	if (disconnect)
+		pgstat_report_disconnect(MyDatabaseId);
+
 	/*
 	 * Destroy pgStatTabHash before we start invalidating PgStat_TableEntry
 	 * entries it points to.  (Should we fail partway through the loop below,
@@ -947,7 +948,7 @@ pgstat_report_stat(bool disconnect)
 				   sizeof(PgStat_TableCounts));
 			if (++this_msg->m_nentries >= PGSTAT_NUM_TABENTRIES)
 			{
-				pgstat_send_tabstat(this_msg);
+				pgstat_send_tabstat(this_msg, now);
 				this_msg->m_nentries = 0;
 			}
 		}
@@ -962,10 +963,10 @@ pgstat_report_stat(bool disconnect)
 	 * gets counted, even if there are no table stats to send.
 	 */
 	if (regular_msg.m_nentries > 0 ||
-		pgStatXactCommit > 0 || pgStatXactRollback > 0)
-		pgstat_send_tabstat(&regular_msg);
+		pgStatXactCommit > 0 || pgStatXactRollback > 0 || disconnect)
+		pgstat_send_tabstat(&regular_msg, now);
 	if (shared_msg.m_nentries > 0)
-		pgstat_send_tabstat(&shared_msg);
+		pgstat_send_tabstat(&shared_msg, now);
 
 	/* Now, send function statistics */
 	pgstat_send_funcstats();
@@ -981,7 +982,7 @@ pgstat_report_stat(bool disconnect)
  * Subroutine for pgstat_report_stat: finish and send a tabstat message
  */
 static void
-pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
+pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg, TimestampTz now)
 {
 	int			n;
 	int			len;
@@ -1000,10 +1001,42 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
 		tsmsg->m_xact_rollback = pgStatXactRollback;
 		tsmsg->m_block_read_time = pgStatBlockReadTime;
 		tsmsg->m_block_write_time = pgStatBlockWriteTime;
+
+		/*
+		 * We report session statistics only for normal backend processes.
+		 * Parallel workers run in parallel, so they don't contribute to
+		 * session times, even though they use CPU time. Walsender processes
+		 * could be considered here, but they have different session
+		 * characteristics from normal backends (for example, they are always
+		 * "active"), so they would skew session statistics.
+		 */
+		if (MyBackendType == B_BACKEND)
+		{
+			long		secs;
+			int			usecs;
+
+			/*
+			 * pgLastSessionReportTime is initialized to MyStartTimestamp by
+			 * pgstat_report_connect().
+			 */
+			TimestampDifference(pgLastSessionReportTime, now, &secs, &usecs);
+			pgLastSessionReportTime = now;
+			tsmsg->m_session_time = (PgStat_Counter) secs * 1000000 + usecs;
+			tsmsg->m_active_time = pgStatActiveTime;
+			tsmsg->m_idle_in_xact_time = pgStatTransactionIdleTime;
+		}
+		else
+		{
+			tsmsg->m_session_time = 0;
+			tsmsg->m_active_time = 0;
+			tsmsg->m_idle_in_xact_time = 0;
+		}
 		pgStatXactCommit = 0;
 		pgStatXactRollback = 0;
 		pgStatBlockReadTime = 0;
 		pgStatBlockWriteTime = 0;
+		pgStatActiveTime = 0;
+		pgStatTransactionIdleTime = 0;
 	}
 	else
 	{
@@ -1011,6 +1044,9 @@ pgstat_send_tabstat(PgStat_MsgTabstat *tsmsg)
 		tsmsg->m_xact_rollback = 0;
 		tsmsg->m_block_read_time = 0;
 		tsmsg->m_block_write_time = 0;
+		tsmsg->m_session_time = 0;
+		tsmsg->m_active_time = 0;
+		tsmsg->m_idle_in_xact_time = 0;
 	}
 
 	n = tsmsg->m_nentries;
@@ -1378,49 +1414,6 @@ pgstat_drop_relation(Oid relid)
 }
 #endif							/* NOT_USED */
 
-
-/* ----------
- * pgstat_send_connstats() -
- *
- *	Tell the collector about session statistics.
- *	The parameter "disconnect" will be true when the backend exits.
- *	"last_report" is the last time we were called (0 if never).
- * ----------
- */
-static void
-pgstat_send_connstats(bool disconnect, TimestampTz last_report)
-{
-	PgStat_MsgConn msg;
-	long		secs;
-	int			usecs;
-
-	if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
-		return;
-
-	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECTION);
-	msg.m_databaseid = MyDatabaseId;
-
-	/* session time since the last report */
-	TimestampDifference(((last_report == 0) ? MyStartTimestamp : last_report),
-						GetCurrentTimestamp(),
-						&secs, &usecs);
-	msg.m_session_time = secs * 1000000 + usecs;
-
-	msg.m_disconnect = disconnect ? pgStatSessionEndCause : DISCONNECT_NOT_YET;
-
-	msg.m_active_time = pgStatActiveTime;
-	pgStatActiveTime = 0;
-
-	msg.m_idle_in_xact_time = pgStatTransactionIdleTime;
-	pgStatTransactionIdleTime = 0;
-
-	/* report a new session only the first time */
-	msg.m_count = (last_report == 0) ? 1 : 0;
-
-	pgstat_send(&msg, sizeof(PgStat_MsgConn));
-}
-
-
 /* ----------
  * pgstat_reset_counters() -
  *
@@ -1759,6 +1752,41 @@ pgstat_report_tempfile(size_t filesize)
 	pgstat_send(&msg, sizeof(msg));
 }
 
+/* --------
+ * pgstat_report_connect() -
+ *
+ *	Tell the collector about a new connection.
+ * --------
+ */
+void
+pgstat_report_connect(Oid dboid)
+{
+	PgStat_MsgConnect msg;
+
+	pgLastSessionReportTime = MyStartTimestamp;
+
+	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_CONNECT);
+	msg.m_databaseid = MyDatabaseId;
+	pgstat_send(&msg, sizeof(PgStat_MsgConnect));
+}
+
+/* --------
+ * pgstat_report_disconnect() -
+ *
+ *	Tell the collector about a disconnect.
+ * --------
+ */
+static void
+pgstat_report_disconnect(Oid dboid)
+{
+	PgStat_MsgDisconnect msg;
+
+	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_DISCONNECT);
+	msg.m_databaseid = MyDatabaseId;
+	msg.m_cause = pgStatSessionEndCause;
+	pgstat_send(&msg, sizeof(PgStat_MsgDisconnect));
+}
+
 /* ----------
  * pgstat_report_replslot() -
  *
@@ -3465,8 +3493,12 @@ PgstatCollectorMain(int argc, char *argv[])
 					pgstat_recv_replslot(&msg.msg_replslot, len);
 					break;
 
-				case PGSTAT_MTYPE_CONNECTION:
-					pgstat_recv_connstat(&msg.msg_conn, len);
+				case PGSTAT_MTYPE_CONNECT:
+					pgstat_recv_connect(&msg.msg_connect, len);
+					break;
+
+				case PGSTAT_MTYPE_DISCONNECT:
+					pgstat_recv_disconnect(&msg.msg_disconnect, len);
 					break;
 
 				default:
@@ -4904,6 +4936,10 @@ pgstat_recv_tabstat(PgStat_MsgTabstat *msg, int len)
 	dbentry->n_block_read_time += msg->m_block_read_time;
 	dbentry->n_block_write_time += msg->m_block_write_time;
 
+	dbentry->total_session_time += msg->m_session_time;
+	dbentry->total_active_time += msg->m_active_time;
+	dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time;
+
 	/*
 	 * Process all table entries in the message.
 	 */
@@ -5568,23 +5604,34 @@ pgstat_recv_replslot(PgStat_MsgReplSlot *msg, int len)
 }
 
 /* ----------
- * pgstat_recv_connstat() -
+ * pgstat_recv_connect() -
  *
- *  Process connection information.
+ *	Process a CONNECT message.
  * ----------
  */
 static void
-pgstat_recv_connstat(PgStat_MsgConn *msg, int len)
+pgstat_recv_connect(PgStat_MsgConnect *msg, int len)
 {
 	PgStat_StatDBEntry *dbentry;
 
 	dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+	dbentry->n_sessions++;
+}
 
-	dbentry->n_sessions += msg->m_count;
-	dbentry->total_session_time += msg->m_session_time;
-	dbentry->total_active_time += msg->m_active_time;
-	dbentry->total_idle_in_xact_time += msg->m_idle_in_xact_time;
-	switch (msg->m_disconnect)
+/* ----------
+ * pgstat_recv_disconnect() -
+ *
+ *	Process a DISCONNECT message.
+ * ----------
+ */
+static void
+pgstat_recv_disconnect(PgStat_MsgDisconnect *msg, int len)
+{
+	PgStat_StatDBEntry *dbentry;
+
+	dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+
+	switch (msg->m_cause)
 	{
 		case DISCONNECT_NOT_YET:
 		case DISCONNECT_NORMAL:
diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c
index 58b5960e27..e7d34804b0 100644
--- a/src/backend/tcop/postgres.c
+++ b/src/backend/tcop/postgres.c
@@ -4110,6 +4110,8 @@ PostgresMain(int argc, char *argv[],
 	if (IsUnderPostmaster && Log_disconnections)
 		on_proc_exit(log_disconnections, 0);
 
+	pgstat_report_connect(MyDatabaseId);
+
 	/* Perform initialization specific to a WAL sender process. */
 	if (am_walsender)
 		InitWalSender();
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index e19c4506ef..7229598822 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -590,9 +590,9 @@ pgstat_report_activity(BackendState state, const char *cmd_str)
 
 		if (beentry->st_state == STATE_RUNNING ||
 			beentry->st_state == STATE_FASTPATH)
-			pgstat_count_conn_active_time(secs * 1000000 + usecs);
+			pgstat_count_conn_active_time((PgStat_Counter) secs * 1000000 + usecs);
 		else
-			pgstat_count_conn_txn_idle_time(secs * 1000000 + usecs);
+			pgstat_count_conn_txn_idle_time((PgStat_Counter) secs * 1000000 + usecs);
 	}
 
 	/*
diff --git a/src/include/pgstat.h b/src/include/pgstat.h
index 509849c7ff..a7b386821f 100644
--- a/src/include/pgstat.h
+++ b/src/include/pgstat.h
@@ -81,7 +81,8 @@ typedef enum StatMsgType
 	PGSTAT_MTYPE_DEADLOCK,
 	PGSTAT_MTYPE_CHECKSUMFAILURE,
 	PGSTAT_MTYPE_REPLSLOT,
-	PGSTAT_MTYPE_CONNECTION,
+	PGSTAT_MTYPE_CONNECT,
+	PGSTAT_MTYPE_DISCONNECT,
 } StatMsgType;
 
 /* ----------
@@ -279,7 +280,7 @@ typedef struct PgStat_TableEntry
  * ----------
  */
 #define PGSTAT_NUM_TABENTRIES  \
-	((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 2 * sizeof(PgStat_Counter))	\
+	((PGSTAT_MSG_PAYLOAD - sizeof(Oid) - 3 * sizeof(int) - 5 * sizeof(PgStat_Counter)) \
 	 / sizeof(PgStat_TableEntry))
 
 typedef struct PgStat_MsgTabstat
@@ -291,6 +292,9 @@ typedef struct PgStat_MsgTabstat
 	int			m_xact_rollback;
 	PgStat_Counter m_block_read_time;	/* times in microseconds */
 	PgStat_Counter m_block_write_time;
+	PgStat_Counter m_session_time;
+	PgStat_Counter m_active_time;
+	PgStat_Counter m_idle_in_xact_time;
 	PgStat_TableEntry m_entry[PGSTAT_NUM_TABENTRIES];
 } PgStat_MsgTabstat;
 
@@ -653,20 +657,26 @@ typedef struct PgStat_MsgChecksumFailure
 } PgStat_MsgChecksumFailure;
 
 /* ----------
- * PgStat_MsgConn			Sent by the backend to update connection statistics.
+ * PgStat_MsgConnect			Sent by the backend upon connection
+ *								establishment
  * ----------
  */
-typedef struct PgStat_MsgConn
+typedef struct PgStat_MsgConnect
 {
 	PgStat_MsgHdr m_hdr;
 	Oid			m_databaseid;
-	PgStat_Counter m_count;
-	PgStat_Counter m_session_time;
-	PgStat_Counter m_active_time;
-	PgStat_Counter m_idle_in_xact_time;
-	SessionEndType m_disconnect;
-} PgStat_MsgConn;
+} PgStat_MsgConnect;
 
+/* ----------
+ * PgStat_MsgDisconnect			Sent by the backend when disconnecting
+ * ----------
+ */
+typedef struct PgStat_MsgDisconnect
+{
+	PgStat_MsgHdr m_hdr;
+	Oid			m_databaseid;
+	SessionEndType m_cause;
+} PgStat_MsgDisconnect;
 
 /* ----------
  * PgStat_Msg					Union over all possible messages.
@@ -700,7 +710,8 @@ typedef union PgStat_Msg
 	PgStat_MsgTempFile msg_tempfile;
 	PgStat_MsgChecksumFailure msg_checksumfailure;
 	PgStat_MsgReplSlot msg_replslot;
-	PgStat_MsgConn msg_conn;
+	PgStat_MsgConnect msg_connect;
+	PgStat_MsgDisconnect msg_disconnect;
 } PgStat_Msg;
 
 
@@ -1010,6 +1021,7 @@ extern void pgstat_reset_single_counter(Oid objectid, PgStat_Single_Reset_Type t
 extern void pgstat_reset_slru_counter(const char *);
 extern void pgstat_reset_replslot_counter(const char *name);
 
+extern void pgstat_report_connect(Oid dboid);
 extern void pgstat_report_autovac(Oid dboid);
 extern void pgstat_report_vacuum(Oid tableoid, bool shared,
 								 PgStat_Counter livetuples, PgStat_Counter deadtuples);
diff --git a/src/tools/pgindent/typedefs.list b/src/tools/pgindent/typedefs.list
index 423780652f..402a6617a9 100644
--- a/src/tools/pgindent/typedefs.list
+++ b/src/tools/pgindent/typedefs.list
@@ -1923,8 +1923,9 @@ PgStat_MsgArchiver
 PgStat_MsgAutovacStart
 PgStat_MsgBgWriter
 PgStat_MsgChecksumFailure
-PgStat_MsgConn
+PgStat_MsgConnect
 PgStat_MsgDeadlock
+PgStat_MsgDisconnect
 PgStat_MsgDropdb
 PgStat_MsgDummy
 PgStat_MsgFuncpurge
-- 
2.31.1

Reply via email to