From d14b97cb7737e32d9d809794a6671b47788879c0 Mon Sep 17 00:00:00 2001
From: Jacob Champion <jacob.champion@enterprisedb.com>
Date: Fri, 3 May 2024 15:58:23 -0700
Subject: [PATCH v4 4/4] Report external auth calls as wait events

Introduce a new "Auth" wait class for various external authentication
systems, to make it obvious what's going wrong if one of those systems
hangs. Each new wait event is unique in order to more easily pinpoint
problematic locations in the code.

Discussion: https://postgr.es/m/CAOYmi%2B%3D60deN20WDyCoHCiecgivJxr%3D98s7s7-C8SkXwrCfHXg%40mail.gmail.com
---
 doc/src/sgml/monitoring.sgml                  |  8 +++
 src/backend/libpq/auth.c                      | 54 +++++++++++++++----
 src/backend/utils/activity/wait_event.c       | 11 ++++
 .../utils/activity/wait_event_names.txt       | 24 +++++++++
 src/include/utils/wait_event.h                |  1 +
 src/test/regress/expected/sysviews.out        |  3 +-
 6 files changed, 90 insertions(+), 11 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 81a4a95152..a148e63711 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1045,6 +1045,14 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
        see <xref linkend="wait-event-activity-table"/>.
       </entry>
      </row>
+     <row>
+      <entry><literal>Auth</literal></entry>
+      <entry>The server process is waiting for an external system to
+       authenticate and/or authorize the client connection.
+       <literal>wait_event</literal> will identify the specific wait point;
+       see <xref linkend="wait-event-auth-table"/>.
+      </entry>
+     </row>
      <row>
       <entry><literal>BufferPin</literal></entry>
       <entry>The server process is waiting for exclusive access to
diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c
index 47e8c91606..bbcde591ae 100644
--- a/src/backend/libpq/auth.c
+++ b/src/backend/libpq/auth.c
@@ -38,6 +38,7 @@
 #include "replication/walsender.h"
 #include "storage/ipc.h"
 #include "utils/memutils.h"
+#include "utils/wait_event.h"
 
 /*----------------------------------------------------------------
  * Global authentication functions
@@ -1000,6 +1001,7 @@ pg_GSS_recvauth(Port *port)
 		elog(DEBUG4, "processing received GSS token of length %u",
 			 (unsigned int) gbuf.length);
 
+		pgstat_report_wait_start(WAIT_EVENT_GSSAPI_ACCEPT_SEC_CONTEXT);
 		maj_stat = gss_accept_sec_context(&min_stat,
 										  &port->gss->ctx,
 										  port->gss->cred,
@@ -1011,6 +1013,7 @@ pg_GSS_recvauth(Port *port)
 										  &gflags,
 										  NULL,
 										  pg_gss_accept_delegation ? &delegated_creds : NULL);
+		pgstat_report_wait_end();
 
 		/* gbuf no longer used */
 		pfree(buf.data);
@@ -1222,6 +1225,7 @@ pg_SSPI_recvauth(Port *port)
 	/*
 	 * Acquire a handle to the server credentials.
 	 */
+	pgstat_report_wait_start(WAIT_EVENT_SSPI_ACQUIRE_CREDENTIALS_HANDLE);
 	r = AcquireCredentialsHandle(NULL,
 								 "negotiate",
 								 SECPKG_CRED_INBOUND,
@@ -1231,6 +1235,8 @@ pg_SSPI_recvauth(Port *port)
 								 NULL,
 								 &sspicred,
 								 &expiry);
+	pgstat_report_wait_end();
+
 	if (r != SEC_E_OK)
 		pg_SSPI_error(ERROR, _("could not acquire SSPI credentials"), r);
 
@@ -1296,6 +1302,7 @@ pg_SSPI_recvauth(Port *port)
 		elog(DEBUG4, "processing received SSPI token of length %u",
 			 (unsigned int) buf.len);
 
+		pgstat_report_wait_start(WAIT_EVENT_SSPI_ACCEPT_SECURITY_CONTEXT);
 		r = AcceptSecurityContext(&sspicred,
 								  sspictx,
 								  &inbuf,
@@ -1305,6 +1312,7 @@ pg_SSPI_recvauth(Port *port)
 								  &outbuf,
 								  &contextattr,
 								  NULL);
+		pgstat_report_wait_end();
 
 		/* input buffer no longer used */
 		pfree(buf.data);
@@ -1402,19 +1410,25 @@ pg_SSPI_recvauth(Port *port)
 
 	CloseHandle(token);
 
+	pgstat_report_wait_start(WAIT_EVENT_SSPI_LOOKUP_ACCOUNT_SID);
 	if (!LookupAccountSid(NULL, tokenuser->User.Sid, accountname, &accountnamesize,
 						  domainname, &domainnamesize, &accountnameuse))
 		ereport(ERROR,
 				(errmsg_internal("could not look up account SID: error code %lu",
 								 GetLastError())));
+	pgstat_report_wait_end();
 
 	free(tokenuser);
 
 	if (!port->hba->compat_realm)
 	{
-		int			status = pg_SSPI_make_upn(accountname, sizeof(accountname),
-											  domainname, sizeof(domainname),
-											  port->hba->upn_username);
+		int			status;
+
+		pgstat_report_wait_start(WAIT_EVENT_SSPI_MAKE_UPN);
+		status = pg_SSPI_make_upn(accountname, sizeof(accountname),
+								  domainname, sizeof(domainname),
+								  port->hba->upn_username);
+		pgstat_report_wait_end();
 
 		if (status != STATUS_OK)
 			/* Error already reported from pg_SSPI_make_upn */
@@ -2119,7 +2133,9 @@ CheckPAMAuth(Port *port, const char *user, const char *password)
 		return STATUS_ERROR;
 	}
 
+	pgstat_report_wait_start(WAIT_EVENT_PAM_AUTHENTICATE);
 	retval = pam_authenticate(pamh, 0);
+	pgstat_report_wait_end();
 
 	if (retval != PAM_SUCCESS)
 	{
@@ -2132,7 +2148,9 @@ CheckPAMAuth(Port *port, const char *user, const char *password)
 		return pam_no_password ? STATUS_EOF : STATUS_ERROR;
 	}
 
+	pgstat_report_wait_start(WAIT_EVENT_PAM_ACCT_MGMT);
 	retval = pam_acct_mgmt(pamh, 0);
+	pgstat_report_wait_end();
 
 	if (retval != PAM_SUCCESS)
 	{
@@ -2483,7 +2501,11 @@ CheckLDAPAuth(Port *port)
 	if (passwd == NULL)
 		return STATUS_EOF;		/* client wouldn't send password */
 
-	if (InitializeLDAPConnection(port, &ldap) == STATUS_ERROR)
+	pgstat_report_wait_start(WAIT_EVENT_LDAP_INITIALIZE);
+	r = InitializeLDAPConnection(port, &ldap);
+	pgstat_report_wait_end();
+
+	if (r == STATUS_ERROR)
 	{
 		/* Error message already sent */
 		pfree(passwd);
@@ -2530,9 +2552,12 @@ CheckLDAPAuth(Port *port)
 		 * Bind with a pre-defined username/password (if available) for
 		 * searching. If none is specified, this turns into an anonymous bind.
 		 */
+		pgstat_report_wait_start(WAIT_EVENT_LDAP_BIND_FOR_SEARCH);
 		r = ldap_simple_bind_s(ldap,
 							   port->hba->ldapbinddn ? port->hba->ldapbinddn : "",
 							   port->hba->ldapbindpasswd ? ldap_password_hook(port->hba->ldapbindpasswd) : "");
+		pgstat_report_wait_end();
+
 		if (r != LDAP_SUCCESS)
 		{
 			ereport(LOG,
@@ -2555,6 +2580,8 @@ CheckLDAPAuth(Port *port)
 			filter = psprintf("(uid=%s)", port->user_name);
 
 		search_message = NULL;
+
+		pgstat_report_wait_start(WAIT_EVENT_LDAP_SEARCH);
 		r = ldap_search_s(ldap,
 						  port->hba->ldapbasedn,
 						  port->hba->ldapscope,
@@ -2562,6 +2589,7 @@ CheckLDAPAuth(Port *port)
 						  attributes,
 						  0,
 						  &search_message);
+		pgstat_report_wait_end();
 
 		if (r != LDAP_SUCCESS)
 		{
@@ -2630,7 +2658,9 @@ CheckLDAPAuth(Port *port)
 							port->user_name,
 							port->hba->ldapsuffix ? port->hba->ldapsuffix : "");
 
+	pgstat_report_wait_start(WAIT_EVENT_LDAP_BIND);
 	r = ldap_simple_bind_s(ldap, fulluser, passwd);
+	pgstat_report_wait_end();
 
 	if (r != LDAP_SUCCESS)
 	{
@@ -2890,12 +2920,16 @@ CheckRADIUSAuth(Port *port)
 	identifiers = list_head(port->hba->radiusidentifiers);
 	foreach(server, port->hba->radiusservers)
 	{
-		int			ret = PerformRadiusTransaction(lfirst(server),
-												   lfirst(secrets),
-												   radiusports ? lfirst(radiusports) : NULL,
-												   identifiers ? lfirst(identifiers) : NULL,
-												   port->user_name,
-												   passwd);
+		int			ret;
+
+		pgstat_report_wait_start(WAIT_EVENT_RADIUS_TRANSACTION);
+		ret = PerformRadiusTransaction(lfirst(server),
+									   lfirst(secrets),
+									   radiusports ? lfirst(radiusports) : NULL,
+									   identifiers ? lfirst(identifiers) : NULL,
+									   port->user_name,
+									   passwd);
+		pgstat_report_wait_end();
 
 		/*------
 		 * STATUS_OK = Login OK
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index d930277140..a388999b1a 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -34,6 +34,7 @@ static const char *pgstat_get_wait_client(WaitEventClient w);
 static const char *pgstat_get_wait_ipc(WaitEventIPC w);
 static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
 static const char *pgstat_get_wait_io(WaitEventIO w);
+static const char *pgstat_get_wait_auth(WaitEventAuth w);
 
 
 static uint32 local_my_wait_event_info;
@@ -413,6 +414,9 @@ pgstat_get_wait_event_type(uint32 wait_event_info)
 		case PG_WAIT_INJECTIONPOINT:
 			event_type = "InjectionPoint";
 			break;
+		case PG_WAIT_AUTH:
+			event_type = "Auth";
+			break;
 		default:
 			event_type = "???";
 			break;
@@ -495,6 +499,13 @@ pgstat_get_wait_event(uint32 wait_event_info)
 				event_name = pgstat_get_wait_io(w);
 				break;
 			}
+		case PG_WAIT_AUTH:
+			{
+				WaitEventAuth w = (WaitEventAuth) wait_event_info;
+
+				event_name = pgstat_get_wait_auth(w);
+				break;
+			}
 		default:
 			event_name = "unknown wait event";
 			break;
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index 8efb4044d6..e5c27a2d2c 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -162,6 +162,30 @@ XACT_GROUP_UPDATE	"Waiting for the group leader to update transaction status at
 
 ABI_compatibility:
 
+#
+# Wait Events - Auth
+#
+# Use this category when a process is waiting for a third party to
+# authenticate/authorize the user.
+#
+
+Section: ClassName - WaitEventAuth
+
+GSSAPI_ACCEPT_SEC_CONTEXT	"Waiting for a response from a Kerberos server via GSSAPI."
+LDAP_BIND	"Waiting for an LDAP bind operation to authenticate the user."
+LDAP_BIND_FOR_SEARCH	"Waiting for an LDAP bind operation to search the directory."
+LDAP_INITIALIZE	"Waiting to initialize an LDAP connection."
+LDAP_SEARCH	"Waiting for an LDAP search operation to complete."
+PAM_ACCT_MGMT	"Waiting for the local PAM service to validate the user account."
+PAM_AUTHENTICATE	"Waiting for the local PAM service to authenticate the user."
+RADIUS_TRANSACTION	"Waiting for a RADIUS transaction to complete."
+SSPI_ACCEPT_SECURITY_CONTEXT	"Waiting for a Windows security provider to accept the client's SSPI token."
+SSPI_ACQUIRE_CREDENTIALS_HANDLE	"Waiting for a Windows security provider to acquire server credentials for SSPI."
+SSPI_LOOKUP_ACCOUNT_SID	"Waiting for Windows to find the user's account SID."
+SSPI_MAKE_UPN	"Waiting for Windows to translate a Kerberos UPN."
+
+ABI_compatibility:
+
 #
 # Wait Events - Timeout
 #
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index 9f18a753d4..014d536441 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -25,6 +25,7 @@
 #define PG_WAIT_TIMEOUT				0x09000000U
 #define PG_WAIT_IO					0x0A000000U
 #define PG_WAIT_INJECTIONPOINT		0x0B000000U
+#define PG_WAIT_AUTH				0x0C000000U
 
 /* enums for wait events */
 #include "utils/wait_event_types.h"
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index fad7fc3a7e..030d1a8321 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -179,6 +179,7 @@ select type, count(*) > 0 as ok FROM pg_wait_events
    type    | ok 
 -----------+----
  Activity  | t
+ Auth      | t
  BufferPin | t
  Client    | t
  Extension | t
@@ -187,7 +188,7 @@ select type, count(*) > 0 as ok FROM pg_wait_events
  LWLock    | t
  Lock      | t
  Timeout   | t
-(9 rows)
+(10 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
-- 
2.34.1

