From e755fdccf16cb4fcd3036e1209291750ffecd261 Mon Sep 17 00:00:00 2001
From: Jacob Champion <jacob.champion@enterprisedb.com>
Date: Fri, 3 May 2024 15:54:58 -0700
Subject: [PATCH v5 1/2] pgstat: report in earlier with STATE_STARTING

Add pgstat_bestart_pre_auth(), which reports a 'starting' state while
waiting for backend initialization and client authentication to
complete. Since we hold a transaction open for a good amount of that,
and some authentication methods call out to external systems, having a
pg_stat_activity entry helps DBAs debug when things go badly wrong.
---
 doc/src/sgml/monitoring.sgml                |  6 ++
 src/backend/utils/activity/backend_status.c | 37 +++++++++-
 src/backend/utils/adt/pgstatfuncs.c         |  3 +
 src/backend/utils/init/postinit.c           | 20 ++++-
 src/include/utils/backend_status.h          |  2 +
 src/test/authentication/Makefile            |  4 +
 src/test/authentication/meson.build         |  4 +
 src/test/authentication/t/007_pre_auth.pl   | 81 +++++++++++++++++++++
 8 files changed, 150 insertions(+), 7 deletions(-)
 create mode 100644 src/test/authentication/t/007_pre_auth.pl

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 331315f8d3..81a4a95152 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -899,6 +899,12 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
        Current overall state of this backend.
        Possible values are:
        <itemizedlist>
+        <listitem>
+         <para>
+          <literal>starting</literal>: The backend is in initial startup. Client
+          authentication is performed during this phase.
+         </para>
+        </listitem>
         <listitem>
         <para>
           <literal>active</literal>: The backend is executing a query.
diff --git a/src/backend/utils/activity/backend_status.c b/src/backend/utils/activity/backend_status.c
index bdb3a296ca..d71d7c1b4f 100644
--- a/src/backend/utils/activity/backend_status.c
+++ b/src/backend/utils/activity/backend_status.c
@@ -69,6 +69,7 @@ static int	localNumBackends = 0;
 static MemoryContext backendStatusSnapContext;
 
 
+static void pgstat_bestart_internal(bool pre_auth);
 static void pgstat_beshutdown_hook(int code, Datum arg);
 static void pgstat_read_current_status(void);
 static void pgstat_setup_backend_status_context(void);
@@ -269,6 +270,34 @@ pgstat_beinit(void)
  */
 void
 pgstat_bestart(void)
+{
+	pgstat_bestart_internal(false);
+}
+
+
+/* ----------
+ * pgstat_bestart_pre_auth() -
+ *
+ *	Like pgstat_beinit(), above, but it's designed to be called before
+ *	authentication has been performed (so we have no user or database IDs).
+ *	Called from InitPostgres.
+ *----------
+ */
+void
+pgstat_bestart_pre_auth(void)
+{
+	pgstat_bestart_internal(true);
+}
+
+
+/* ----------
+ * pgstat_bestart_internal() -
+ *
+ *	Implementation of both flavors of pgstat_bestart().
+ *----------
+ */
+static void
+pgstat_bestart_internal(bool pre_auth)
 {
 	volatile PgBackendStatus *vbeentry = MyBEEntry;
 	PgBackendStatus lbeentry;
@@ -318,9 +347,9 @@ pgstat_bestart(void)
 	lbeentry.st_databaseid = MyDatabaseId;
 
 	/* We have userid for client-backends, wal-sender and bgworker processes */
-	if (lbeentry.st_backendType == B_BACKEND
-		|| lbeentry.st_backendType == B_WAL_SENDER
-		|| lbeentry.st_backendType == B_BG_WORKER)
+	if (!pre_auth && (lbeentry.st_backendType == B_BACKEND
+					  || lbeentry.st_backendType == B_WAL_SENDER
+					  || lbeentry.st_backendType == B_BG_WORKER))
 		lbeentry.st_userid = GetSessionUserId();
 	else
 		lbeentry.st_userid = InvalidOid;
@@ -375,7 +404,7 @@ pgstat_bestart(void)
 	lbeentry.st_gss = false;
 #endif
 
-	lbeentry.st_state = STATE_UNDEFINED;
+	lbeentry.st_state = pre_auth ? STATE_STARTING : STATE_UNDEFINED;
 	lbeentry.st_progress_command = PROGRESS_COMMAND_INVALID;
 	lbeentry.st_progress_command_target = InvalidOid;
 	lbeentry.st_query_id = UINT64CONST(0);
diff --git a/src/backend/utils/adt/pgstatfuncs.c b/src/backend/utils/adt/pgstatfuncs.c
index f7b50e0b5a..c461bbd400 100644
--- a/src/backend/utils/adt/pgstatfuncs.c
+++ b/src/backend/utils/adt/pgstatfuncs.c
@@ -366,6 +366,9 @@ pg_stat_get_activity(PG_FUNCTION_ARGS)
 
 			switch (beentry->st_state)
 			{
+				case STATE_STARTING:
+					values[4] = CStringGetTextDatum("starting");
+					break;
 				case STATE_IDLE:
 					values[4] = CStringGetTextDatum("idle");
 					break;
diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c
index a024b1151d..adaa83e745 100644
--- a/src/backend/utils/init/postinit.c
+++ b/src/backend/utils/init/postinit.c
@@ -58,6 +58,7 @@
 #include "utils/builtins.h"
 #include "utils/fmgroids.h"
 #include "utils/guc_hooks.h"
+#include "utils/injection_point.h"
 #include "utils/memutils.h"
 #include "utils/pg_locale.h"
 #include "utils/portal.h"
@@ -714,6 +715,21 @@ InitPostgres(const char *in_dbname, Oid dboid,
 	 */
 	InitProcessPhase2();
 
+	/* Initialize status reporting */
+	pgstat_beinit();
+
+	/*
+	 * This is a convenient time to sketch in a partial pgstat entry. That
+	 * way, if LWLocks or third-party authentication should happen to hang,
+	 * the DBA will still be able to see what's going on. (A later call to
+	 * pgstat_bestart() will fill in the rest of the status.)
+	 */
+	if (!bootstrap)
+	{
+		pgstat_bestart_pre_auth();
+		INJECTION_POINT("init-pre-auth");
+	}
+
 	/*
 	 * Initialize my entry in the shared-invalidation manager's array of
 	 * per-backend data.
@@ -782,9 +798,6 @@ InitPostgres(const char *in_dbname, Oid dboid,
 	/* Initialize portal manager */
 	EnablePortalManager();
 
-	/* Initialize status reporting */
-	pgstat_beinit();
-
 	/*
 	 * Load relcache entries for the shared system catalogs.  This must create
 	 * at least entries for pg_database and catalogs used for authentication.
@@ -882,6 +895,7 @@ InitPostgres(const char *in_dbname, Oid dboid,
 	{
 		/* normal multiuser case */
 		Assert(MyProcPort != NULL);
+
 		PerformAuthentication(MyProcPort);
 		InitializeSessionUserId(username, useroid, false);
 		/* ensure that auth_method is actually valid, aka authn_id is not NULL */
diff --git a/src/include/utils/backend_status.h b/src/include/utils/backend_status.h
index 97874300c3..8a6d573ce3 100644
--- a/src/include/utils/backend_status.h
+++ b/src/include/utils/backend_status.h
@@ -24,6 +24,7 @@
 typedef enum BackendState
 {
 	STATE_UNDEFINED,
+	STATE_STARTING,
 	STATE_IDLE,
 	STATE_RUNNING,
 	STATE_IDLEINTRANSACTION,
@@ -309,6 +310,7 @@ extern void BackendStatusShmemInit(void);
 
 /* Initialization functions */
 extern void pgstat_beinit(void);
+extern void pgstat_bestart_pre_auth(void);
 extern void pgstat_bestart(void);
 
 extern void pgstat_clear_backend_activity_snapshot(void);
diff --git a/src/test/authentication/Makefile b/src/test/authentication/Makefile
index da0b71873a..d05b15de7c 100644
--- a/src/test/authentication/Makefile
+++ b/src/test/authentication/Makefile
@@ -13,6 +13,10 @@ subdir = src/test/authentication
 top_builddir = ../../..
 include $(top_builddir)/src/Makefile.global
 
+EXTRA_INSTALL = src/test/modules/injection_points
+
+export enable_injection_points
+
 check:
 	$(prove_check)
 
diff --git a/src/test/authentication/meson.build b/src/test/authentication/meson.build
index 8f5688dcc1..fe4ca51873 100644
--- a/src/test/authentication/meson.build
+++ b/src/test/authentication/meson.build
@@ -5,6 +5,9 @@ tests += {
   'sd': meson.current_source_dir(),
   'bd': meson.current_build_dir(),
   'tap': {
+    'env': {
+       'enable_injection_points': get_option('injection_points') ? 'yes' : 'no',
+    },
     'tests': [
       't/001_password.pl',
       't/002_saslprep.pl',
@@ -12,6 +15,7 @@ tests += {
       't/004_file_inclusion.pl',
       't/005_sspi.pl',
       't/006_login_trigger.pl',
+      't/007_pre_auth.pl',
     ],
   },
 }
diff --git a/src/test/authentication/t/007_pre_auth.pl b/src/test/authentication/t/007_pre_auth.pl
new file mode 100644
index 0000000000..dd554462d3
--- /dev/null
+++ b/src/test/authentication/t/007_pre_auth.pl
@@ -0,0 +1,81 @@
+
+# Copyright (c) 2021-2024, PostgreSQL Global Development Group
+
+# Tests for connection behavior prior to authentication.
+
+use strict;
+use warnings FATAL => 'all';
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep);
+use Test::More;
+
+if ($ENV{enable_injection_points} ne 'yes')
+{
+	plan skip_all => 'Injection points not supported by this build';
+}
+
+my $node = PostgreSQL::Test::Cluster->new('primary');
+$node->init;
+$node->append_conf(
+	'postgresql.conf', q[
+log_connections = on
+]);
+
+$node->start;
+
+# Check if the extension injection_points is available, as it may be
+# possible that this script is run with installcheck, where the module
+# would not be installed by default.
+if (!$node->check_extension('injection_points'))
+{
+	plan skip_all => 'Extension injection_points not installed';
+}
+
+$node->safe_psql('postgres', 'CREATE EXTENSION injection_points');
+
+# Connect to the server and inject a waitpoint.
+my $psql = $node->background_psql('postgres');
+$psql->query_safe("SELECT injection_points_attach('init-pre-auth', 'wait')");
+
+# From this point on, all new connections will hang during startup, just before
+# authentication. Use the $psql connection handle for server interaction.
+my $conn = $node->background_psql('postgres', wait => 0);
+
+# Wait for the connection to show up.
+my $pid;
+while (1)
+{
+	$pid = $psql->query(
+		"SELECT pid FROM pg_stat_activity WHERE state = 'starting';");
+	last if $pid ne "";
+
+	usleep(500_000);
+}
+
+note "backend $pid is authenticating";
+ok(1, 'authenticating connections are recorded in pg_stat_activity');
+
+# Detach the waitpoint and wait for the connection to complete.
+$psql->query_safe("SELECT injection_points_wakeup('init-pre-auth');");
+$conn->wait_connect();
+
+# Make sure the pgstat entry is updated eventually.
+while (1)
+{
+	my $state =
+	  $psql->query("SELECT state FROM pg_stat_activity WHERE pid = $pid;");
+	last if $state eq "idle";
+
+	note "state for backend $pid is '$state'; waiting for 'idle'...";
+	usleep(500_000);
+}
+
+ok(1, 'authenticated connections reach idle state in pg_stat_activity');
+
+$psql->query_safe("SELECT injection_points_detach('init-pre-auth');");
+$psql->quit();
+$conn->quit();
+
+done_testing();
-- 
2.34.1

