This patch adds counters and views to monitor hot standby generated recovery conflicts. It extends the pg_stat_database view with one column with the total number of conflicts, and also creates a new view pg_stat_database_conflicts that contains a breakdown of exactly what caused the conflicts.
Documentation still pending, but comments meanwhile is of course appreciated ;) -- Magnus Hagander Me: http://www.hagander.net/ Work: http://www.redpill-linpro.com/
*** a/src/backend/catalog/system_views.sql --- b/src/backend/catalog/system_views.sql *************** *** 502,508 **** CREATE VIEW pg_stat_database AS pg_stat_get_db_tuples_fetched(D.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(D.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(D.oid) AS tup_updated, ! pg_stat_get_db_tuples_deleted(D.oid) AS tup_deleted FROM pg_database D; CREATE VIEW pg_stat_user_functions AS --- 502,521 ---- pg_stat_get_db_tuples_fetched(D.oid) AS tup_fetched, pg_stat_get_db_tuples_inserted(D.oid) AS tup_inserted, pg_stat_get_db_tuples_updated(D.oid) AS tup_updated, ! pg_stat_get_db_tuples_deleted(D.oid) AS tup_deleted, ! pg_stat_get_db_conflict_all(D.oid) AS conflicts ! FROM pg_database D; ! ! CREATE VIEW pg_stat_database_conflicts AS ! SELECT ! D.oid AS datid, ! D.datname AS datname, ! pg_stat_get_db_conflict_database(D.oid) AS confl_database, ! pg_stat_get_db_conflict_tablespace(D.oid) AS confl_tablespace, ! pg_stat_get_db_conflict_lock(D.oid) AS confl_lock, ! pg_stat_get_db_conflict_snapshot(D.oid) AS confl_snapshot, ! pg_stat_get_db_conflict_bufferpin(D.oid) AS confl_bufferpin, ! pg_stat_get_db_conflict_startup_deadlock(D.oid) AS confl_deadlock FROM pg_database D; CREATE VIEW pg_stat_user_functions AS *** a/src/backend/postmaster/pgstat.c --- b/src/backend/postmaster/pgstat.c *************** *** 57,62 **** --- 57,63 ---- #include "storage/ipc.h" #include "storage/pg_shmem.h" #include "storage/pmsignal.h" + #include "storage/procsignal.h" #include "utils/guc.h" #include "utils/memutils.h" #include "utils/ps_status.h" *************** *** 278,283 **** static void pgstat_recv_analyze(PgStat_MsgAnalyze *msg, int len); --- 279,285 ---- static void pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len); static void pgstat_recv_funcstat(PgStat_MsgFuncstat *msg, int len); static void pgstat_recv_funcpurge(PgStat_MsgFuncpurge *msg, int len); + static void pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len); /* ------------------------------------------------------------ *************** *** 1314,1319 **** pgstat_report_analyze(Relation rel, bool adopt_counts, --- 1316,1340 ---- pgstat_send(&msg, sizeof(msg)); } + /* -------- + * pgstat_report_recovery_conflict() - + * + * Tell the collector about a Hot Standby recovery conflict. + * -------- + */ + void + pgstat_report_recovery_conflict(int reason) + { + PgStat_MsgRecoveryConflict msg; + + if (pgStatSock == PGINVALID_SOCKET || !pgstat_track_counts) + return; + + pgstat_setheader(&msg.m_hdr, PGSTAT_MTYPE_RECOVERYCONFLICT); + msg.m_databaseid = MyDatabaseId; + msg.m_reason = reason; + pgstat_send(&msg, sizeof(msg)); + } /* ---------- * pgstat_ping() - *************** *** 3053,3058 **** PgstatCollectorMain(int argc, char *argv[]) --- 3074,3083 ---- pgstat_recv_funcpurge((PgStat_MsgFuncpurge *) &msg, len); break; + case PGSTAT_MTYPE_RECOVERYCONFLICT: + pgstat_recv_recoveryconflict((PgStat_MsgRecoveryConflict *) &msg, len); + break; + default: break; } *************** *** 3129,3134 **** pgstat_get_db_entry(Oid databaseid, bool create) --- 3154,3165 ---- result->n_tuples_updated = 0; result->n_tuples_deleted = 0; result->last_autovac_time = 0; + result->n_conflict_database = 0; + result->n_conflict_tablespace = 0; + result->n_conflict_lock = 0; + result->n_conflict_snapshot = 0; + result->n_conflict_bufferpin = 0; + result->n_conflict_startup_deadlock = 0; memset(&hash_ctl, 0, sizeof(hash_ctl)); hash_ctl.keysize = sizeof(Oid); *************** *** 4204,4209 **** pgstat_recv_bgwriter(PgStat_MsgBgWriter *msg, int len) --- 4235,4275 ---- } /* ---------- + * pgstat_recv_recoveryconflict() - + * + * Process as RECOVERYCONFLICT message. + * ---------- + */ + static void + pgstat_recv_recoveryconflict(PgStat_MsgRecoveryConflict *msg, int len) + { + PgStat_StatDBEntry *dbentry; + dbentry = pgstat_get_db_entry(msg->m_databaseid, true); + + switch (msg->m_reason) + { + case PROCSIG_RECOVERY_CONFLICT_DATABASE: + dbentry->n_conflict_database++; + break; + case PROCSIG_RECOVERY_CONFLICT_TABLESPACE: + dbentry->n_conflict_tablespace++; + break; + case PROCSIG_RECOVERY_CONFLICT_LOCK: + dbentry->n_conflict_lock++; + break; + case PROCSIG_RECOVERY_CONFLICT_SNAPSHOT: + dbentry->n_conflict_snapshot++; + break; + case PROCSIG_RECOVERY_CONFLICT_BUFFERPIN: + dbentry->n_conflict_bufferpin++; + break; + case PROCSIG_RECOVERY_CONFLICT_STARTUP_DEADLOCK: + dbentry->n_conflict_startup_deadlock++; + break; + } + } + + /* ---------- * pgstat_recv_funcstat() - * * Count what the backend has done. *** a/src/backend/tcop/postgres.c --- b/src/backend/tcop/postgres.c *************** *** 2903,2917 **** ProcessInterrupts(void) --- 2903,2923 ---- (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("terminating autovacuum process due to administrator command"))); else if (RecoveryConflictPending && RecoveryConflictRetryable) + { + pgstat_report_recovery_conflict(RecoveryConflictReason); ereport(FATAL, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), errmsg("terminating connection due to conflict with recovery"), errdetail_recovery_conflict())); + } else if (RecoveryConflictPending) + { + pgstat_report_recovery_conflict(RecoveryConflictReason); ereport(FATAL, (errcode(ERRCODE_ADMIN_SHUTDOWN), errmsg("terminating connection due to conflict with recovery"), errdetail_recovery_conflict())); + } else ereport(FATAL, (errcode(ERRCODE_ADMIN_SHUTDOWN), *************** *** 2956,2961 **** ProcessInterrupts(void) --- 2962,2968 ---- RecoveryConflictPending = false; DisableNotifyInterrupt(); DisableCatchupInterrupt(); + pgstat_report_recovery_conflict(RecoveryConflictReason); if (DoingCommandRead) ereport(FATAL, (errcode(ERRCODE_T_R_SERIALIZATION_FAILURE), *** a/src/backend/utils/adt/pgstatfuncs.c --- b/src/backend/utils/adt/pgstatfuncs.c *************** *** 71,76 **** extern Datum pg_stat_get_db_tuples_fetched(PG_FUNCTION_ARGS); --- 71,83 ---- extern Datum pg_stat_get_db_tuples_inserted(PG_FUNCTION_ARGS); extern Datum pg_stat_get_db_tuples_updated(PG_FUNCTION_ARGS); extern Datum pg_stat_get_db_tuples_deleted(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_db_conflict_database(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_db_conflict_tablespace(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_db_conflict_lock(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_db_conflict_snapshot(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_db_conflict_bufferpin(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_db_conflict_startup_deadlock(PG_FUNCTION_ARGS); + extern Datum pg_stat_get_db_conflict_all(PG_FUNCTION_ARGS); extern Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS); extern Datum pg_stat_get_bgwriter_requested_checkpoints(PG_FUNCTION_ARGS); *************** *** 1130,1135 **** pg_stat_get_db_tuples_deleted(PG_FUNCTION_ARGS) --- 1137,1252 ---- } Datum + pg_stat_get_db_conflict_database(PG_FUNCTION_ARGS) + { + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->n_conflict_database); + + PG_RETURN_INT64(result); + } + + Datum + pg_stat_get_db_conflict_tablespace(PG_FUNCTION_ARGS) + { + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->n_conflict_tablespace); + + PG_RETURN_INT64(result); + } + + Datum + pg_stat_get_db_conflict_lock(PG_FUNCTION_ARGS) + { + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->n_conflict_lock); + + PG_RETURN_INT64(result); + } + + Datum + pg_stat_get_db_conflict_snapshot(PG_FUNCTION_ARGS) + { + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->n_conflict_snapshot); + + PG_RETURN_INT64(result); + } + + Datum + pg_stat_get_db_conflict_bufferpin(PG_FUNCTION_ARGS) + { + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->n_conflict_bufferpin); + + PG_RETURN_INT64(result); + } + + Datum + pg_stat_get_db_conflict_startup_deadlock(PG_FUNCTION_ARGS) + { + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->n_conflict_startup_deadlock); + + PG_RETURN_INT64(result); + } + + Datum + pg_stat_get_db_conflict_all(PG_FUNCTION_ARGS) + { + Oid dbid = PG_GETARG_OID(0); + int64 result; + PgStat_StatDBEntry *dbentry; + + if ((dbentry = pgstat_fetch_stat_dbentry(dbid)) == NULL) + result = 0; + else + result = (int64) (dbentry->n_conflict_database + + dbentry->n_conflict_tablespace + + dbentry->n_conflict_lock + + dbentry->n_conflict_snapshot + + dbentry->n_conflict_bufferpin + + dbentry->n_conflict_startup_deadlock); + + PG_RETURN_INT64(result); + } + + Datum pg_stat_get_bgwriter_timed_checkpoints(PG_FUNCTION_ARGS) { PG_RETURN_INT64(pgstat_fetch_global()->timed_checkpoints); *** a/src/include/catalog/pg_proc.h --- b/src/include/catalog/pg_proc.h *************** *** 3109,3114 **** DATA(insert OID = 2761 ( pg_stat_get_db_tuples_updated PGNSP PGUID 12 1 0 0 f f --- 3109,3128 ---- DESCR("statistics: tuples updated in database"); DATA(insert OID = 2762 ( pg_stat_get_db_tuples_deleted PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_tuples_deleted _null_ _null_ _null_ )); DESCR("statistics: tuples deleted in database"); + DATA(insert OID = 3065 ( pg_stat_get_db_conflict_database PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_database _null_ _null_ _null_ )); + DESCR("statistics: recovery conflicts in database caused by drop database"); + DATA(insert OID = 3066 ( pg_stat_get_db_conflict_tablespace PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_tablespace _null_ _null_ _null_ )); + DESCR("statistics: recovery conflicts in database caused by drop tablespace"); + DATA(insert OID = 3067 ( pg_stat_get_db_conflict_lock PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_lock _null_ _null_ _null_ )); + DESCR("statistics: recovery conflicts in database caused by relation lock"); + DATA(insert OID = 3068 ( pg_stat_get_db_conflict_snapshot PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_snapshot _null_ _null_ _null_ )); + DESCR("statistics: recovery conflicts in database caused by snapshot expiry"); + DATA(insert OID = 3069 ( pg_stat_get_db_conflict_bufferpin PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_bufferpin _null_ _null_ _null_ )); + DESCR("statistics: recovery conflicts in database caused by shared buffer pin"); + DATA(insert OID = 3070 ( pg_stat_get_db_conflict_startup_deadlock PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_startup_deadlock _null_ _null_ _null_ )); + DESCR("statistics: recovery conflicts in database caused by buffer deadlock"); + DATA(insert OID = 3071 ( pg_stat_get_db_conflict_all PGNSP PGUID 12 1 0 0 f f f t f s 1 0 20 "26" _null_ _null_ _null_ _null_ pg_stat_get_db_conflict_all _null_ _null_ _null_ )); + DESCR("statistics: recovery conflicts in database"); DATA(insert OID = 2769 ( pg_stat_get_bgwriter_timed_checkpoints PGNSP PGUID 12 1 0 0 f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_timed_checkpoints _null_ _null_ _null_ )); DESCR("statistics: number of timed checkpoints started by the bgwriter"); DATA(insert OID = 2770 ( pg_stat_get_bgwriter_requested_checkpoints PGNSP PGUID 12 1 0 0 f f f t f s 0 0 20 "" _null_ _null_ _null_ _null_ pg_stat_get_bgwriter_requested_checkpoints _null_ _null_ _null_ )); *** a/src/include/pgstat.h --- b/src/include/pgstat.h *************** *** 45,51 **** typedef enum StatMsgType PGSTAT_MTYPE_ANALYZE, PGSTAT_MTYPE_BGWRITER, PGSTAT_MTYPE_FUNCSTAT, ! PGSTAT_MTYPE_FUNCPURGE } StatMsgType; /* ---------- --- 45,52 ---- PGSTAT_MTYPE_ANALYZE, PGSTAT_MTYPE_BGWRITER, PGSTAT_MTYPE_FUNCSTAT, ! PGSTAT_MTYPE_FUNCPURGE, ! PGSTAT_MTYPE_RECOVERYCONFLICT } StatMsgType; /* ---------- *************** *** 364,369 **** typedef struct PgStat_MsgBgWriter --- 365,381 ---- PgStat_Counter m_buf_alloc; } PgStat_MsgBgWriter; + /* ---------- + * PgStat_MsgRecoveryConflict Sent by the backend upon recovery conflict + * ---------- + */ + typedef struct PgStat_MsgRecoveryConflict + { + PgStat_MsgHdr m_hdr; + + Oid m_databaseid; + int m_reason; + } PgStat_MsgRecoveryConflict; /* ---------- * PgStat_FunctionCounts The actual per-function counts kept by a backend *************** *** 460,465 **** typedef union PgStat_Msg --- 472,478 ---- PgStat_MsgBgWriter msg_bgwriter; PgStat_MsgFuncstat msg_funcstat; PgStat_MsgFuncpurge msg_funcpurge; + PgStat_MsgRecoveryConflict msg_recoveryconflict; } PgStat_Msg; *************** *** 490,495 **** typedef struct PgStat_StatDBEntry --- 503,515 ---- PgStat_Counter n_tuples_updated; PgStat_Counter n_tuples_deleted; TimestampTz last_autovac_time; + PgStat_Counter n_conflict_database; + PgStat_Counter n_conflict_tablespace; + PgStat_Counter n_conflict_lock; + PgStat_Counter n_conflict_snapshot; + PgStat_Counter n_conflict_bufferpin; + PgStat_Counter n_conflict_startup_deadlock; + /* * tables and functions must be last in the struct, because we don't write *************** *** 689,694 **** extern void pgstat_report_vacuum(Oid tableoid, bool shared, bool adopt_counts, --- 709,716 ---- extern void pgstat_report_analyze(Relation rel, bool adopt_counts, PgStat_Counter livetuples, PgStat_Counter deadtuples); + extern void pgstat_report_recovery_conflict(int reason); + extern void pgstat_initialize(void); extern void pgstat_bestart(void);
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers