This patch adds counters and views to monitor hot standby generated
recovery conflicts. It extends the pg_stat_database view with one
column with the total number of conflicts, and also creates a new view
pg_stat_database_conflicts that contains a breakdown of exactly what
caused the conflicts.

Documentation still pending, but comments meanwhile is of course appreciated ;)

-- 
 Magnus Hagander
 Me: http://www.hagander.net/
 Work: http://www.redpill-linpro.com/
*** a/src/backend/catalog/system_views.sql
--- b/src/backend/catalog/system_views.sql
***************
*** 502,508 **** CREATE VIEW pg_stat_database AS
              pg_stat_get_db_tuples_fetched(D.oid) AS tup_fetched,
              pg_stat_get_db_tuples_inserted(D.oid) AS tup_inserted,
              pg_stat_get_db_tuples_updated(D.oid) AS tup_updated,
!             pg_stat_get_db_tuples_deleted(D.oid) AS tup_deleted
      FROM pg_database D;
  
  CREATE VIEW pg_stat_user_functions AS
--- 502,521 ----
              pg_stat_get_db_tuples_fetched(D.oid) AS tup_fetched,
              pg_stat_get_db_tuples_inserted(D.oid) AS tup_inserted,
              pg_stat_get_db_tuples_updated(D.oid) AS tup_updated,
!             pg_stat_get_db_tuples_deleted(D.oid) AS tup_deleted,
! 	    pg_stat_get_db_conflict_all(D.oid) AS conflicts
!     FROM pg_database D;
! 
! CREATE VIEW pg_stat_database_conflicts AS
!     SELECT
!             D.oid AS datid,
!             D.datname AS datname,
!             pg_stat_get_db_conflict_database(D.oid) AS confl_database,
!             pg_stat_get_db_conflict_tablespace(D.oid) AS confl_tablespace,
!             pg_stat_get_db_conflict_lock(D.oid) AS confl_lock,
!             pg_stat_get_db_conflict_snapshot(D.oid) AS confl_snapshot,
!             pg_stat_get_db_conflict_bufferpin(D.oid) AS confl_bufferpin,
!             pg_stat_get_db_conflict_startup_deadlock(D.oid) AS confl_deadlock
      FROM pg_database D;
  
  CREATE VIEW pg_stat_user_functions AS
*** a/src/backend/postmaster/pgstat.c
--- b/src/backend/postmaster/pgstat.c
***************
*** 57,62 ****
--- 57,63 ----
  #include "storage/ipc.h"
  #include "storage/pg_shmem.h"
  #include "storage/pmsignal.h"
+ #include "storage/procsignal.h"
  #include "utils/guc.h"
  #include "utils/memutils.h"
  #include "utils/ps_status.h"
***************
*** 278,283 **** static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len);
--- 279,285 ----
  static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len);
  static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len);
  static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len);
+ static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len);
  
  
  /* ------------------------------------------------------------
***************
*** 1314,1319 **** pgstat_report_analyze(Relation rel, bool adopt_counts,
--- 1316,1340 ----
  	pgstat_send(&msg, sizeof(msg));
  }
  
+ /* --------
+  * pgstat_report_recovery_conflict() -
+  *
+  *  Tell the collector about a Hot Standby recovery conflict.
+  * --------
+  */
+ void
+ pgstat_report_recovery_conflict(int reason)
+ {
+ 	PgStat_MsgRecoveryConflict msg;
+ 
+ 	if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts)
+ 		return;
+ 
+ 	pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RECOVERYCONFLICT);
+ 	msg.m_databaseid = MyDatabaseId;
+ 	msg.m_reason = reason;
+ 	pgstat_send(&msg, sizeof(msg));
+ }
  
  /* ----------
   * pgstat_ping() -
***************
*** 3053,3058 **** PgstatCollectorMain(int argc, char *argv[])
--- 3074,3083 ----
  					pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len);
  					break;
  
+ 				case PGSTAT_MTYPE_RECOVERYCONFLICT:
+ 					pgstat_recv_recoveryconflict((PgStat_MsgRecoveryConflict *) &msg, len);
+ 					break;
+ 
  				default:
  					break;
  			}
***************
*** 3129,3134 **** pgstat_get_db_entry(Oid databaseid, bool create)
--- 3154,3165 ----
  		result->n_tuples_updated = 0;
  		result->n_tuples_deleted = 0;
  		result->last_autovac_time = 0;
+ 		result->n_conflict_database = 0;
+ 		result->n_conflict_tablespace = 0;
+ 		result->n_conflict_lock = 0;
+ 		result->n_conflict_snapshot = 0;
+ 		result->n_conflict_bufferpin = 0;
+ 		result->n_conflict_startup_deadlock = 0;
  
  		memset(&hash_ctl, 0, sizeof(hash_ctl));
  		hash_ctl.keysize = sizeof(Oid);
***************
*** 4204,4209 **** pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len)
--- 4235,4275 ----
  }
  
  /* ----------
+  * pgstat_recv_recoveryconflict() -
+  *
+  *  Process as RECOVERYCONFLICT message.
+  * ----------
+  */
+ static void
+ pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len)
+ {
+ 	PgStat_StatDBEntry *dbentry;
+ 	dbentry = pgstat_get_db_entry(msg->m_databaseid, true);
+ 
+ 	switch (msg->m_reason)
+ 	{
+ 		case PROCSIG_RECOVERY_CONFLICT_DATABASE:
+ 			dbentry->n_conflict_database++;
+ 			break;
+ 		case PROCSIG_RECOVERY_CONFLICT_TABLESPACE:
+ 			dbentry->n_conflict_tablespace++;
+ 			break;
+ 		case PROCSIG_RECOVERY_CONFLICT_LOCK:
+ 			dbentry->n_conflict_lock++;
+ 			break;
+ 		case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT:
+ 			dbentry->n_conflict_snapshot++;
+ 			break;
+ 		case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN:
+ 			dbentry->n_conflict_bufferpin++;
+ 			break;
+ 		case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK:
+ 			dbentry->n_conflict_startup_deadlock++;
+ 			break;
+ 	}
+ }
+ 
+ /* ----------
   * pgstat_recv_funcstat() -
   *
   *	Count what the backend has done.
*** a/src/backend/tcop/postgres.c
--- b/src/backend/tcop/postgres.c
***************
*** 2903,2917 **** ProcessInterrupts(void)
--- 2903,2923 ----
  					(errcode(ERRCODE_ADMIN_SHUTDOWN),
  					 errmsg("terminating autovacuum process due to administrator command")));
  		else if (RecoveryConflictPending && RecoveryConflictRetryable)
+ 		{
+ 			pgstat_report_recovery_conflict(RecoveryConflictReason);
  			ereport(FATAL,
  					(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
  			  errmsg("terminating connection due to conflict with recovery"),
  					 errdetail_recovery_conflict()));
+ 		}
  		else if (RecoveryConflictPending)
+ 		{
+ 			pgstat_report_recovery_conflict(RecoveryConflictReason);
  			ereport(FATAL,
  					(errcode(ERRCODE_ADMIN_SHUTDOWN),
  			  errmsg("terminating connection due to conflict with recovery"),
  					 errdetail_recovery_conflict()));
+ 		}
  		else
  			ereport(FATAL,
  					(errcode(ERRCODE_ADMIN_SHUTDOWN),
***************
*** 2956,2961 **** ProcessInterrupts(void)
--- 2962,2968 ----
  			RecoveryConflictPending = false;
  			DisableNotifyInterrupt();
  			DisableCatchupInterrupt();
+ 			pgstat_report_recovery_conflict(RecoveryConflictReason);
  			if (DoingCommandRead)
  				ereport(FATAL,
  						(errcode(ERRCODE_T_R_SERIALIZATION_FAILURE),
*** a/src/backend/utils/adt/pgstatfuncs.c
--- b/src/backend/utils/adt/pgstatfuncs.c
***************
*** 71,76 **** extern Datum pg_stat_get_db_tuples_fetched(PG_FUNCTION_ARGS);
--- 71,83 ----
  extern Datum pg_stat_get_db_tuples_inserted(PG_FUNCTION_ARGS);
  extern Datum pg_stat_get_db_tuples_updated(PG_FUNCTION_ARGS);
  extern Datum pg_stat_get_db_tuples_deleted(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_database(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_tablespace(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_lock(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_snapshot(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_bufferpin(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_startup_deadlock(PG_FUNCTION_ARGS);
+ extern Datum pg_stat_get_db_conflict_all(PG_FUNCTION_ARGS);
  
  extern Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS);
  extern Datum pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS);
***************
*** 1130,1135 **** pg_stat_get_db_tuples_deleted(PG_FUNCTION_ARGS)
--- 1137,1252 ----
  }
  
  Datum
+ pg_stat_get_db_conflict_database(PG_FUNCTION_ARGS)
+ {
+ 	Oid			dbid = PG_GETARG_OID(0);
+ 	int64		result;
+ 	PgStat_StatDBEntry *dbentry;
+ 
+ 	if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ 		result = 0;
+ 	else
+ 		result = (int64) (dbentry->n_conflict_database);
+ 
+ 	PG_RETURN_INT64(result);
+ }
+ 
+ Datum
+ pg_stat_get_db_conflict_tablespace(PG_FUNCTION_ARGS)
+ {
+ 	Oid			dbid = PG_GETARG_OID(0);
+ 	int64		result;
+ 	PgStat_StatDBEntry *dbentry;
+ 
+ 	if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ 		result = 0;
+ 	else
+ 		result = (int64) (dbentry->n_conflict_tablespace);
+ 
+ 	PG_RETURN_INT64(result);
+ }
+ 
+ Datum
+ pg_stat_get_db_conflict_lock(PG_FUNCTION_ARGS)
+ {
+ 	Oid			dbid = PG_GETARG_OID(0);
+ 	int64		result;
+ 	PgStat_StatDBEntry *dbentry;
+ 
+ 	if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ 		result = 0;
+ 	else
+ 		result = (int64) (dbentry->n_conflict_lock);
+ 
+ 	PG_RETURN_INT64(result);
+ }
+ 
+ Datum
+ pg_stat_get_db_conflict_snapshot(PG_FUNCTION_ARGS)
+ {
+ 	Oid			dbid = PG_GETARG_OID(0);
+ 	int64		result;
+ 	PgStat_StatDBEntry *dbentry;
+ 
+ 	if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ 		result = 0;
+ 	else
+ 		result = (int64) (dbentry->n_conflict_snapshot);
+ 
+ 	PG_RETURN_INT64(result);
+ }
+ 
+ Datum
+ pg_stat_get_db_conflict_bufferpin(PG_FUNCTION_ARGS)
+ {
+ 	Oid			dbid = PG_GETARG_OID(0);
+ 	int64		result;
+ 	PgStat_StatDBEntry *dbentry;
+ 
+ 	if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ 		result = 0;
+ 	else
+ 		result = (int64) (dbentry->n_conflict_bufferpin);
+ 
+ 	PG_RETURN_INT64(result);
+ }
+ 
+ Datum
+ pg_stat_get_db_conflict_startup_deadlock(PG_FUNCTION_ARGS)
+ {
+ 	Oid			dbid = PG_GETARG_OID(0);
+ 	int64		result;
+ 	PgStat_StatDBEntry *dbentry;
+ 
+ 	if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ 		result = 0;
+ 	else
+ 		result = (int64) (dbentry->n_conflict_startup_deadlock);
+ 
+ 	PG_RETURN_INT64(result);
+ }
+ 
+ Datum
+ pg_stat_get_db_conflict_all(PG_FUNCTION_ARGS)
+ {
+ 	Oid			dbid = PG_GETARG_OID(0);
+ 	int64		result;
+ 	PgStat_StatDBEntry *dbentry;
+ 
+ 	if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL)
+ 		result = 0;
+ 	else
+ 		result = (int64) (dbentry->n_conflict_database +
+ 			dbentry->n_conflict_tablespace +
+ 			dbentry->n_conflict_lock +
+ 			dbentry->n_conflict_snapshot +
+ 			dbentry->n_conflict_bufferpin +
+ 			dbentry->n_conflict_startup_deadlock);
+ 
+ 	PG_RETURN_INT64(result);
+ }
+ 
+ Datum
  pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS)
  {
  	PG_RETURN_INT64(pgstat_fetch_global()->timed_checkpoints);
*** a/src/include/catalog/pg_proc.h
--- b/src/include/catalog/pg_proc.h
***************
*** 3109,3114 **** DATA(insert OID = 2761 (  pg_stat_get_db_tuples_updated PGNSP PGUID 12 1 0 0 f f
--- 3109,3128 ----
  DESCR("statistics: tuples updated in database");
  DATA(insert OID = 2762 (  pg_stat_get_db_tuples_deleted PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_tuples_deleted _null_ _null_ _null_ ));
  DESCR("statistics: tuples deleted in database");
+ DATA(insert OID = 3065 (  pg_stat_get_db_conflict_database PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_database _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by drop database");
+ DATA(insert OID = 3066 (  pg_stat_get_db_conflict_tablespace PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_tablespace _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by drop tablespace");
+ DATA(insert OID = 3067 (  pg_stat_get_db_conflict_lock PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_lock _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by relation lock");
+ DATA(insert OID = 3068 (  pg_stat_get_db_conflict_snapshot PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_snapshot _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by snapshot expiry");
+ DATA(insert OID = 3069 (  pg_stat_get_db_conflict_bufferpin PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_bufferpin _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by shared buffer pin");
+ DATA(insert OID = 3070 (  pg_stat_get_db_conflict_startup_deadlock PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_startup_deadlock _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database caused by buffer deadlock");
+ DATA(insert OID = 3071 (  pg_stat_get_db_conflict_all PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_all _null_ _null_ _null_ ));
+ DESCR("statistics: recovery conflicts in database");
  DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ ));
  DESCR("statistics: number of timed checkpoints started by the bgwriter");
  DATA(insert OID = 2770 ( pg_stat_get_bgwriter_requested_checkpoints PGNSP PGUID 12 1 0 0 f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_requested_checkpoints _null_ _null_ _null_ ));
*** a/src/include/pgstat.h
--- b/src/include/pgstat.h
***************
*** 45,51 **** typedef enum StatMsgType
  	PGSTAT_MTYPE_ANALYZE,
  	PGSTAT_MTYPE_BGWRITER,
  	PGSTAT_MTYPE_FUNCSTAT,
! 	PGSTAT_MTYPE_FUNCPURGE
  } StatMsgType;
  
  /* ----------
--- 45,52 ----
  	PGSTAT_MTYPE_ANALYZE,
  	PGSTAT_MTYPE_BGWRITER,
  	PGSTAT_MTYPE_FUNCSTAT,
! 	PGSTAT_MTYPE_FUNCPURGE,
! 	PGSTAT_MTYPE_RECOVERYCONFLICT
  } StatMsgType;
  
  /* ----------
***************
*** 364,369 **** typedef struct PgStat_MsgBgWriter
--- 365,381 ----
  	PgStat_Counter m_buf_alloc;
  } PgStat_MsgBgWriter;
  
+ /* ----------
+  * PgStat_MsgRecoveryConflict	Sent by the backend upon recovery conflict
+  * ----------
+  */
+ typedef struct PgStat_MsgRecoveryConflict
+ {
+ 	PgStat_MsgHdr m_hdr;
+ 
+ 	Oid			m_databaseid;
+ 	int			m_reason;
+ } PgStat_MsgRecoveryConflict;
  
  /* ----------
   * PgStat_FunctionCounts	The actual per-function counts kept by a backend
***************
*** 460,465 **** typedef union PgStat_Msg
--- 472,478 ----
  	PgStat_MsgBgWriter msg_bgwriter;
  	PgStat_MsgFuncstat msg_funcstat;
  	PgStat_MsgFuncpurge msg_funcpurge;
+ 	PgStat_MsgRecoveryConflict msg_recoveryconflict;
  } PgStat_Msg;
  
  
***************
*** 490,495 **** typedef struct PgStat_StatDBEntry
--- 503,515 ----
  	PgStat_Counter n_tuples_updated;
  	PgStat_Counter n_tuples_deleted;
  	TimestampTz last_autovac_time;
+ 	PgStat_Counter n_conflict_database;
+ 	PgStat_Counter n_conflict_tablespace;
+ 	PgStat_Counter n_conflict_lock;
+ 	PgStat_Counter n_conflict_snapshot;
+ 	PgStat_Counter n_conflict_bufferpin;
+ 	PgStat_Counter n_conflict_startup_deadlock;
+ 
  
  	/*
  	 * tables and functions must be last in the struct, because we don't write
***************
*** 689,694 **** extern void pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts,
--- 709,716 ----
  extern void pgstat_report_analyze(Relation rel, bool adopt_counts,
  					  PgStat_Counter livetuples, PgStat_Counter deadtuples);
  
+ extern void pgstat_report_recovery_conflict(int reason);
+ 
  extern void pgstat_initialize(void);
  extern void pgstat_bestart(void);
  
-- 
Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-hackers

Reply via email to