From 0dc8e12758a1e7bdde7e5e4e60570b8b4f75d240 Mon Sep 17 00:00:00 2001
From: Jacob Champion <jacob.champion@enterprisedb.com>
Date: Fri, 3 May 2024 15:58:23 -0700
Subject: [PATCH v11 1/2] Report external auth calls as wait events

Introduce a new "Auth" wait class for various external authentication
systems, to make it obvious what's going wrong if one of those systems
hangs. Each new wait event is unique in order to more easily pinpoint
problematic locations in the code.

Discussion: https://postgr.es/m/CAOYmi%2B%3D60deN20WDyCoHCiecgivJxr%3D98s7s7-C8SkXwrCfHXg%40mail.gmail.com
---
 doc/src/sgml/monitoring.sgml                  |  8 +++
 src/backend/libpq/auth.c                      | 56 +++++++++++++++++--
 src/backend/utils/activity/wait_event.c       | 11 ++++
 .../utils/activity/wait_event_names.txt       | 27 +++++++++
 src/include/utils/wait_event.h                |  1 +
 src/test/regress/expected/sysviews.out        |  3 +-
 6 files changed, 100 insertions(+), 6 deletions(-)

diff --git a/doc/src/sgml/monitoring.sgml b/doc/src/sgml/monitoring.sgml
index 16646f560e8..877163fb403 100644
--- a/doc/src/sgml/monitoring.sgml
+++ b/doc/src/sgml/monitoring.sgml
@@ -1045,6 +1045,14 @@ postgres   27093  0.0  0.0  30096  2752 ?        Ss   11:34   0:00 postgres: ser
        see <xref linkend="wait-event-activity-table"/>.
       </entry>
      </row>
+     <row>
+      <entry><literal>Auth</literal></entry>
+      <entry>The server process is waiting for an external system to
+       authenticate and/or authorize the client connection.
+       <literal>wait_event</literal> will identify the specific wait point;
+       see <xref linkend="wait-event-auth-table"/>.
+      </entry>
+     </row>
      <row>
       <entry><literal>BufferPin</literal></entry>
       <entry>The server process is waiting for exclusive access to
diff --git a/src/backend/libpq/auth.c b/src/backend/libpq/auth.c
index 81e2f8184e3..bd8a2a098b3 100644
--- a/src/backend/libpq/auth.c
+++ b/src/backend/libpq/auth.c
@@ -39,6 +39,7 @@
 #include "replication/walsender.h"
 #include "storage/ipc.h"
 #include "utils/memutils.h"
+#include "utils/wait_event.h"
 
 /*----------------------------------------------------------------
  * Global authentication functions
@@ -990,6 +991,7 @@ pg_GSS_recvauth(Port *port)
 		elog(DEBUG4, "processing received GSS token of length %u",
 			 (unsigned int) gbuf.length);
 
+		pgstat_report_wait_start(WAIT_EVENT_GSSAPI_ACCEPT_SEC_CONTEXT);
 		maj_stat = gss_accept_sec_context(&min_stat,
 										  &port->gss->ctx,
 										  port->gss->cred,
@@ -1001,6 +1003,7 @@ pg_GSS_recvauth(Port *port)
 										  &gflags,
 										  NULL,
 										  pg_gss_accept_delegation ? &delegated_creds : NULL);
+		pgstat_report_wait_end();
 
 		/* gbuf no longer used */
 		pfree(buf.data);
@@ -1212,6 +1215,7 @@ pg_SSPI_recvauth(Port *port)
 	/*
 	 * Acquire a handle to the server credentials.
 	 */
+	pgstat_report_wait_start(WAIT_EVENT_SSPI_ACQUIRE_CREDENTIALS_HANDLE);
 	r = AcquireCredentialsHandle(NULL,
 								 "negotiate",
 								 SECPKG_CRED_INBOUND,
@@ -1221,6 +1225,8 @@ pg_SSPI_recvauth(Port *port)
 								 NULL,
 								 &sspicred,
 								 &expiry);
+	pgstat_report_wait_end();
+
 	if (r != SEC_E_OK)
 		pg_SSPI_error(ERROR, _("could not acquire SSPI credentials"), r);
 
@@ -1286,6 +1292,7 @@ pg_SSPI_recvauth(Port *port)
 		elog(DEBUG4, "processing received SSPI token of length %u",
 			 (unsigned int) buf.len);
 
+		pgstat_report_wait_start(WAIT_EVENT_SSPI_ACCEPT_SECURITY_CONTEXT);
 		r = AcceptSecurityContext(&sspicred,
 								  sspictx,
 								  &inbuf,
@@ -1295,6 +1302,7 @@ pg_SSPI_recvauth(Port *port)
 								  &outbuf,
 								  &contextattr,
 								  NULL);
+		pgstat_report_wait_end();
 
 		/* input buffer no longer used */
 		pfree(buf.data);
@@ -1392,11 +1400,13 @@ pg_SSPI_recvauth(Port *port)
 
 	CloseHandle(token);
 
+	pgstat_report_wait_start(WAIT_EVENT_SSPI_LOOKUP_ACCOUNT_SID);
 	if (!LookupAccountSid(NULL, tokenuser->User.Sid, accountname, &accountnamesize,
 						  domainname, &domainnamesize, &accountnameuse))
 		ereport(ERROR,
 				(errmsg_internal("could not look up account SID: error code %lu",
 								 GetLastError())));
+	pgstat_report_wait_end();
 
 	free(tokenuser);
 
@@ -1493,8 +1503,11 @@ pg_SSPI_make_upn(char *accountname,
 	 */
 
 	samname = psprintf("%s\\%s", domainname, accountname);
+
+	pgstat_report_wait_start(WAIT_EVENT_SSPI_TRANSLATE_NAME);
 	res = TranslateName(samname, NameSamCompatible, NameUserPrincipal,
 						NULL, &upnamesize);
+	pgstat_report_wait_end();
 
 	if ((!res && GetLastError() != ERROR_INSUFFICIENT_BUFFER)
 		|| upnamesize == 0)
@@ -1509,8 +1522,10 @@ pg_SSPI_make_upn(char *accountname,
 	/* upnamesize includes the terminating NUL. */
 	upname = palloc(upnamesize);
 
+	pgstat_report_wait_start(WAIT_EVENT_SSPI_TRANSLATE_NAME);
 	res = TranslateName(samname, NameSamCompatible, NameUserPrincipal,
 						upname, &upnamesize);
+	pgstat_report_wait_end();
 
 	pfree(samname);
 	if (res)
@@ -2109,7 +2124,9 @@ CheckPAMAuth(Port *port, const char *user, const char *password)
 		return STATUS_ERROR;
 	}
 
+	pgstat_report_wait_start(WAIT_EVENT_PAM_AUTHENTICATE);
 	retval = pam_authenticate(pamh, 0);
+	pgstat_report_wait_end();
 
 	if (retval != PAM_SUCCESS)
 	{
@@ -2122,7 +2139,9 @@ CheckPAMAuth(Port *port, const char *user, const char *password)
 		return pam_no_password ? STATUS_EOF : STATUS_ERROR;
 	}
 
+	pgstat_report_wait_start(WAIT_EVENT_PAM_ACCT_MGMT);
 	retval = pam_acct_mgmt(pamh, 0);
+	pgstat_report_wait_end();
 
 	if (retval != PAM_SUCCESS)
 	{
@@ -2262,7 +2281,11 @@ InitializeLDAPConnection(Port *port, LDAP **ldap)
 			}
 
 			/* Look up a list of LDAP server hosts and port numbers */
-			if (ldap_domain2hostlist(domain, &hostlist))
+			pgstat_report_wait_start(WAIT_EVENT_LDAP_HOST_LOOKUP);
+			r = ldap_domain2hostlist(domain, &hostlist);
+			pgstat_report_wait_end();
+
+			if (r)
 			{
 				ereport(LOG,
 						(errmsg("LDAP authentication could not find DNS SRV records for \"%s\"",
@@ -2356,11 +2379,15 @@ InitializeLDAPConnection(Port *port, LDAP **ldap)
 
 	if (port->hba->ldaptls)
 	{
+		pgstat_report_wait_start(WAIT_EVENT_LDAP_START_TLS);
 #ifndef WIN32
-		if ((r = ldap_start_tls_s(*ldap, NULL, NULL)) != LDAP_SUCCESS)
+		r = ldap_start_tls_s(*ldap, NULL, NULL);
 #else
-		if ((r = ldap_start_tls_s(*ldap, NULL, NULL, NULL, NULL)) != LDAP_SUCCESS)
+		r = ldap_start_tls_s(*ldap, NULL, NULL, NULL, NULL);
 #endif
+		pgstat_report_wait_end();
+
+		if (r != LDAP_SUCCESS)
 		{
 			ereport(LOG,
 					(errmsg("could not start LDAP TLS session: %s",
@@ -2520,9 +2547,12 @@ CheckLDAPAuth(Port *port)
 		 * Bind with a pre-defined username/password (if available) for
 		 * searching. If none is specified, this turns into an anonymous bind.
 		 */
+		pgstat_report_wait_start(WAIT_EVENT_LDAP_BIND_FOR_SEARCH);
 		r = ldap_simple_bind_s(ldap,
 							   port->hba->ldapbinddn ? port->hba->ldapbinddn : "",
 							   port->hba->ldapbindpasswd ? ldap_password_hook(port->hba->ldapbindpasswd) : "");
+		pgstat_report_wait_end();
+
 		if (r != LDAP_SUCCESS)
 		{
 			ereport(LOG,
@@ -2545,6 +2575,8 @@ CheckLDAPAuth(Port *port)
 			filter = psprintf("(uid=%s)", port->user_name);
 
 		search_message = NULL;
+
+		pgstat_report_wait_start(WAIT_EVENT_LDAP_SEARCH);
 		r = ldap_search_s(ldap,
 						  port->hba->ldapbasedn,
 						  port->hba->ldapscope,
@@ -2552,6 +2584,7 @@ CheckLDAPAuth(Port *port)
 						  attributes,
 						  0,
 						  &search_message);
+		pgstat_report_wait_end();
 
 		if (r != LDAP_SUCCESS)
 		{
@@ -2620,7 +2653,9 @@ CheckLDAPAuth(Port *port)
 							port->user_name,
 							port->hba->ldapsuffix ? port->hba->ldapsuffix : "");
 
+	pgstat_report_wait_start(WAIT_EVENT_LDAP_BIND);
 	r = ldap_simple_bind_s(ldap, fulluser, passwd);
+	pgstat_report_wait_end();
 
 	if (r != LDAP_SUCCESS)
 	{
@@ -3069,8 +3104,12 @@ PerformRadiusTransaction(const char *server, const char *secret, const char *por
 		return STATUS_ERROR;
 	}
 
-	if (sendto(sock, radius_buffer, packetlength, 0,
-			   serveraddrs[0].ai_addr, serveraddrs[0].ai_addrlen) < 0)
+	pgstat_report_wait_start(WAIT_EVENT_RADIUS_SENDTO);
+	r = sendto(sock, radius_buffer, packetlength, 0,
+			   serveraddrs[0].ai_addr, serveraddrs[0].ai_addrlen);
+	pgstat_report_wait_end();
+
+	if (r < 0)
 	{
 		ereport(LOG,
 				(errmsg("could not send RADIUS packet: %m")));
@@ -3118,7 +3157,10 @@ PerformRadiusTransaction(const char *server, const char *secret, const char *por
 		FD_ZERO(&fdset);
 		FD_SET(sock, &fdset);
 
+		pgstat_report_wait_start(WAIT_EVENT_RADIUS_WAIT);
 		r = select(sock + 1, &fdset, NULL, NULL, &timeout);
+		pgstat_report_wait_end();
+
 		if (r < 0)
 		{
 			if (errno == EINTR)
@@ -3151,8 +3193,12 @@ PerformRadiusTransaction(const char *server, const char *secret, const char *por
 		 */
 
 		addrsize = sizeof(remoteaddr);
+
+		pgstat_report_wait_start(WAIT_EVENT_RADIUS_RECVFROM);
 		packetlength = recvfrom(sock, receive_buffer, RADIUS_BUFFER_SIZE, 0,
 								(struct sockaddr *) &remoteaddr, &addrsize);
+		pgstat_report_wait_end();
+
 		if (packetlength < 0)
 		{
 			ereport(LOG,
diff --git a/src/backend/utils/activity/wait_event.c b/src/backend/utils/activity/wait_event.c
index d9b8f34a355..6cc3e1e7c7a 100644
--- a/src/backend/utils/activity/wait_event.c
+++ b/src/backend/utils/activity/wait_event.c
@@ -34,6 +34,7 @@ static const char *pgstat_get_wait_client(WaitEventClient w);
 static const char *pgstat_get_wait_ipc(WaitEventIPC w);
 static const char *pgstat_get_wait_timeout(WaitEventTimeout w);
 static const char *pgstat_get_wait_io(WaitEventIO w);
+static const char *pgstat_get_wait_auth(WaitEventAuth w);
 
 
 static uint32 local_my_wait_event_info;
@@ -413,6 +414,9 @@ pgstat_get_wait_event_type(uint32 wait_event_info)
 		case PG_WAIT_INJECTIONPOINT:
 			event_type = "InjectionPoint";
 			break;
+		case PG_WAIT_AUTH:
+			event_type = "Auth";
+			break;
 		default:
 			event_type = "???";
 			break;
@@ -495,6 +499,13 @@ pgstat_get_wait_event(uint32 wait_event_info)
 				event_name = pgstat_get_wait_io(w);
 				break;
 			}
+		case PG_WAIT_AUTH:
+			{
+				WaitEventAuth w = (WaitEventAuth) wait_event_info;
+
+				event_name = pgstat_get_wait_auth(w);
+				break;
+			}
 		default:
 			event_name = "unknown wait event";
 			break;
diff --git a/src/backend/utils/activity/wait_event_names.txt b/src/backend/utils/activity/wait_event_names.txt
index e199f071628..a2852225614 100644
--- a/src/backend/utils/activity/wait_event_names.txt
+++ b/src/backend/utils/activity/wait_event_names.txt
@@ -162,6 +162,33 @@ XACT_GROUP_UPDATE	"Waiting for the group leader to update transaction status at
 
 ABI_compatibility:
 
+#
+# Wait Events - Auth
+#
+# Use this category when a process is waiting for a third party to
+# authenticate/authorize the user.
+#
+
+Section: ClassName - WaitEventAuth
+
+GSSAPI_ACCEPT_SEC_CONTEXT	"Waiting for a response from a Kerberos server via GSSAPI."
+LDAP_BIND	"Waiting for an LDAP bind operation to authenticate the user."
+LDAP_BIND_FOR_SEARCH	"Waiting for an LDAP bind operation to search the directory."
+LDAP_HOST_LOOKUP	"Waiting for DNS lookup of LDAP servers."
+LDAP_SEARCH	"Waiting for an LDAP search operation to complete."
+LDAP_START_TLS	"Waiting for an LDAP StartTLS exchange."
+PAM_ACCT_MGMT	"Waiting for the PAM service to validate the user account."
+PAM_AUTHENTICATE	"Waiting for the PAM service to authenticate the user."
+RADIUS_RECVFROM	"Waiting for a <function>recvfrom</function> call during a RADIUS transaction."
+RADIUS_SENDTO	"Waiting for a <function>sendto</function> call during a RADIUS transaction."
+RADIUS_WAIT	"Waiting for a RADIUS server to respond."
+SSPI_ACCEPT_SECURITY_CONTEXT	"Waiting for a Windows security provider to accept the client's SSPI token."
+SSPI_ACQUIRE_CREDENTIALS_HANDLE	"Waiting for a Windows security provider to acquire server credentials for SSPI."
+SSPI_LOOKUP_ACCOUNT_SID	"Waiting for Windows to find the user's security identifier."
+SSPI_TRANSLATE_NAME	"Waiting for Windows to translate a Kerberos UPN."
+
+ABI_compatibility:
+
 #
 # Wait Events - Timeout
 #
diff --git a/src/include/utils/wait_event.h b/src/include/utils/wait_event.h
index b8cb3e5a430..3d995a9e5be 100644
--- a/src/include/utils/wait_event.h
+++ b/src/include/utils/wait_event.h
@@ -25,6 +25,7 @@
 #define PG_WAIT_TIMEOUT				0x09000000U
 #define PG_WAIT_IO					0x0A000000U
 #define PG_WAIT_INJECTIONPOINT		0x0B000000U
+#define PG_WAIT_AUTH				0x0C000000U
 
 /* enums for wait events */
 #include "utils/wait_event_types.h"
diff --git a/src/test/regress/expected/sysviews.out b/src/test/regress/expected/sysviews.out
index 83228cfca29..b94930658b0 100644
--- a/src/test/regress/expected/sysviews.out
+++ b/src/test/regress/expected/sysviews.out
@@ -181,6 +181,7 @@ select type, count(*) > 0 as ok FROM pg_wait_events
    type    | ok 
 -----------+----
  Activity  | t
+ Auth      | t
  BufferPin | t
  Client    | t
  Extension | t
@@ -189,7 +190,7 @@ select type, count(*) > 0 as ok FROM pg_wait_events
  LWLock    | t
  Lock      | t
  Timeout   | t
-(9 rows)
+(10 rows)
 
 -- Test that the pg_timezone_names and pg_timezone_abbrevs views are
 -- more-or-less working.  We can't test their contents in any great detail
-- 
2.34.1

