On Wed, Jan 5, 2022 at 12:13 PM Kyotaro Horiguchi <horikyota....@gmail.com> wrote: > > Here's the v2 patch. Please provide your thoughts. > > Thanks! I have three comments on this version.
Thanks for your review. > - I still think "acquire/release" logs on creation/dropping should be > silenced. Adding the third parameter to ReplicationSlotAcquire() > that can mute the acquiring (and as well as the corresponding > releasing) log will work. Done. > can piggy-back on log_replication_commands for the purpose, changing > its meaning slightly to "log replication commands and related > activity". > - Need to mute the logs by log_replication_commands. (We could add > another choice for the variable for this purpose but I think we > don't need it.) Done. > - The messages are not translatable as the variable parts are > adjectives. They need to consist of static sentences. The > combinations of the two properties are 6 (note that persistence is > tristate) but I don't think we accept that complexity for the > information. So I recommend to just remove the variable parts from > the messages. Done. Here's v3, please review it further. Regards, Bharath Rupireddy.
From 46792e7b33c15afeda83769655e2512674c33269 Mon Sep 17 00:00:00 2001 From: Bharath Rupireddy <bharath.rupireddyforpostgres@gmail.com> Date: Sun, 9 Jan 2022 09:38:36 +0000 Subject: [PATCH v3] Add LOG messages when replication slots become active and inactive These logs will be extremely useful on production servers to debug and analyze inactive replication slot issues. --- doc/src/sgml/config.sgml | 6 ++-- .../replication/logical/logicalfuncs.c | 4 +-- src/backend/replication/slot.c | 30 ++++++++++++++++--- src/backend/replication/slotfuncs.c | 10 +++---- src/backend/replication/walsender.c | 12 ++++---- src/backend/storage/lmgr/proc.c | 2 +- src/backend/tcop/postgres.c | 2 +- src/backend/utils/misc/guc.c | 2 +- src/include/replication/slot.h | 4 +-- 9 files changed, 47 insertions(+), 25 deletions(-) diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index afbb6c35e3..5f95fdffc8 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -7284,9 +7284,9 @@ log_line_prefix = '%m [%p] %q%u@%d/%a ' </term> <listitem> <para> - Causes each replication command to be logged in the server log. - See <xref linkend="protocol-replication"/> for more information about - replication command. The default value is <literal>off</literal>. + Causes each replication command and related activity to be logged in + the server log. See <xref linkend="protocol-replication"/> for more + information about replication command. The default value is <literal>off</literal>. Only superusers can change this setting. </para> </listitem> diff --git a/src/backend/replication/logical/logicalfuncs.c b/src/backend/replication/logical/logicalfuncs.c index 4f633888b4..ef19e48719 100644 --- a/src/backend/replication/logical/logicalfuncs.c +++ b/src/backend/replication/logical/logicalfuncs.c @@ -215,7 +215,7 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin else end_of_wal = GetXLogReplayRecPtr(NULL); - ReplicationSlotAcquire(NameStr(*name), true); + ReplicationSlotAcquire(NameStr(*name), true, true); PG_TRY(); { @@ -331,7 +331,7 @@ pg_logical_slot_get_changes_guts(FunctionCallInfo fcinfo, bool confirm, bool bin /* free context, call shutdown callback */ FreeDecodingContext(ctx); - ReplicationSlotRelease(); + ReplicationSlotRelease(true); InvalidateSystemCaches(); } PG_CATCH(); diff --git a/src/backend/replication/slot.c b/src/backend/replication/slot.c index e5e0cf8768..fbb02bb036 100644 --- a/src/backend/replication/slot.c +++ b/src/backend/replication/slot.c @@ -337,6 +337,11 @@ ReplicationSlotCreate(const char *name, bool db_specific, /* Let everybody know we've modified this slot */ ConditionVariableBroadcast(&slot->active_cv); + + ereport(log_replication_commands ? LOG : DEBUG1, + (errmsg("created replication slot \"%s\"", + NameStr(slot->data.name)), + errhidestmt(true))); } /* @@ -377,7 +382,7 @@ SearchNamedReplicationSlot(const char *name, bool need_lock) * nowait is false, we sleep until the slot is released by the owning process. */ void -ReplicationSlotAcquire(const char *name, bool nowait) +ReplicationSlotAcquire(const char *name, bool nowait, bool msg_ok) { ReplicationSlot *s; int active_pid; @@ -457,6 +462,12 @@ retry: /* We made this slot active, so it's ours now. */ MyReplicationSlot = s; + + if (msg_ok) + ereport(log_replication_commands ? LOG : DEBUG1, + (errmsg("acquired replication slot \"%s\"", + NameStr(s->data.name)), + errhidestmt(true))); } /* @@ -466,7 +477,7 @@ retry: * Resources this slot requires will be preserved. */ void -ReplicationSlotRelease(void) +ReplicationSlotRelease(bool msg_ok) { ReplicationSlot *slot = MyReplicationSlot; @@ -516,6 +527,12 @@ ReplicationSlotRelease(void) MyProc->statusFlags &= ~PROC_IN_LOGICAL_DECODING; ProcGlobal->statusFlags[MyProc->pgxactoff] = MyProc->statusFlags; LWLockRelease(ProcArrayLock); + + if (msg_ok) + ereport(log_replication_commands ? LOG : DEBUG1, + (errmsg("released replication slot \"%s\"", + NameStr(slot->data.name)), + errhidestmt(true))); } /* @@ -564,7 +581,7 @@ ReplicationSlotDrop(const char *name, bool nowait) { Assert(MyReplicationSlot == NULL); - ReplicationSlotAcquire(name, nowait); + ReplicationSlotAcquire(name, nowait, false); ReplicationSlotDropAcquired(); } @@ -700,6 +717,11 @@ ReplicationSlotDropPtr(ReplicationSlot *slot) * a slot while we're still cleaning up the detritus of the old one. */ LWLockRelease(ReplicationSlotAllocationLock); + + ereport(log_replication_commands ? LOG : DEBUG1, + (errmsg("dropped replication slot \"%s\"", + NameStr(slot->data.name)), + errhidestmt(true))); } /* @@ -1288,7 +1310,7 @@ InvalidatePossiblyObsoleteSlot(ReplicationSlot *s, XLogRecPtr oldestLSN, /* Make sure the invalidated state persists across server restart */ ReplicationSlotMarkDirty(); ReplicationSlotSave(); - ReplicationSlotRelease(); + ReplicationSlotRelease(true); ereport(LOG, (errmsg("invalidating slot \"%s\" because its restart_lsn %X/%X exceeds max_slot_wal_keep_size", diff --git a/src/backend/replication/slotfuncs.c b/src/backend/replication/slotfuncs.c index ae6316d908..6737c1bb78 100644 --- a/src/backend/replication/slotfuncs.c +++ b/src/backend/replication/slotfuncs.c @@ -99,7 +99,7 @@ pg_create_physical_replication_slot(PG_FUNCTION_ARGS) tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); - ReplicationSlotRelease(); + ReplicationSlotRelease(false); PG_RETURN_DATUM(result); } @@ -201,7 +201,7 @@ pg_create_logical_replication_slot(PG_FUNCTION_ARGS) /* ok, slot is now fully created, mark it as persistent if needed */ if (!temporary) ReplicationSlotPersist(); - ReplicationSlotRelease(); + ReplicationSlotRelease(false); PG_RETURN_DATUM(result); } @@ -631,7 +631,7 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS) moveto = Min(moveto, GetXLogReplayRecPtr(NULL)); /* Acquire the slot so we "own" it */ - ReplicationSlotAcquire(NameStr(*slotname), true); + ReplicationSlotAcquire(NameStr(*slotname), true, true); /* A slot whose restart_lsn has never been reserved cannot be advanced */ if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn)) @@ -674,7 +674,7 @@ pg_replication_slot_advance(PG_FUNCTION_ARGS) ReplicationSlotsComputeRequiredXmin(false); ReplicationSlotsComputeRequiredLSN(); - ReplicationSlotRelease(); + ReplicationSlotRelease(true); /* Return the reached position. */ values[1] = LSNGetDatum(endlsn); @@ -905,7 +905,7 @@ copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot) tuple = heap_form_tuple(tupdesc, values, nulls); result = HeapTupleGetDatum(tuple); - ReplicationSlotRelease(); + ReplicationSlotRelease(false); PG_RETURN_DATUM(result); } diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index 4cf95ce439..52ec13088e 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -304,7 +304,7 @@ WalSndErrorCleanup(void) wal_segment_close(xlogreader); if (MyReplicationSlot != NULL) - ReplicationSlotRelease(); + ReplicationSlotRelease(true); ReplicationSlotCleanup(); @@ -695,7 +695,7 @@ StartReplication(StartReplicationCmd *cmd) if (cmd->slotname) { - ReplicationSlotAcquire(cmd->slotname, true); + ReplicationSlotAcquire(cmd->slotname, true, true); if (SlotIsLogical(MyReplicationSlot)) ereport(ERROR, (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), @@ -841,7 +841,7 @@ StartReplication(StartReplicationCmd *cmd) } if (cmd->slotname) - ReplicationSlotRelease(); + ReplicationSlotRelease(true); /* * Copy is finished now. Send a single-row result set indicating the next @@ -1220,7 +1220,7 @@ CreateReplicationSlot(CreateReplicationSlotCmd *cmd) do_tup_output(tstate, values, nulls); end_tup_output(tstate); - ReplicationSlotRelease(); + ReplicationSlotRelease(false); } /* @@ -1247,7 +1247,7 @@ StartLogicalReplication(StartReplicationCmd *cmd) Assert(!MyReplicationSlot); - ReplicationSlotAcquire(cmd->slotname, true); + ReplicationSlotAcquire(cmd->slotname, true, true); if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn)) ereport(ERROR, @@ -1316,7 +1316,7 @@ StartLogicalReplication(StartReplicationCmd *cmd) WalSndLoop(XLogSendLogical); FreeDecodingContext(logical_decoding_ctx); - ReplicationSlotRelease(); + ReplicationSlotRelease(true); replication_active = false; if (got_STOPPING) diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index e306b04738..cb9fe4dc6f 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -832,7 +832,7 @@ ProcKill(int code, Datum arg) /* Make sure active replication slots are released */ if (MyReplicationSlot != NULL) - ReplicationSlotRelease(); + ReplicationSlotRelease(true); /* Also cleanup all the temporary slots. */ ReplicationSlotCleanup(); diff --git a/src/backend/tcop/postgres.c b/src/backend/tcop/postgres.c index fda2e9360e..1bd192c03f 100644 --- a/src/backend/tcop/postgres.c +++ b/src/backend/tcop/postgres.c @@ -4265,7 +4265,7 @@ PostgresMain(const char *dbname, const char *username) * ensuring we'll correctly cleanup on FATAL errors as well. */ if (MyReplicationSlot != NULL) - ReplicationSlotRelease(); + ReplicationSlotRelease(true); /* We also want to cleanup temporary slots on error. */ ReplicationSlotCleanup(); diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 6fc5cbc09a..90da7fa20e 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -1373,7 +1373,7 @@ static struct config_bool ConfigureNamesBool[] = }, { {"log_replication_commands", PGC_SUSET, LOGGING_WHAT, - gettext_noop("Logs each replication command."), + gettext_noop("Logs each replication command and related activity."), NULL }, &log_replication_commands, diff --git a/src/include/replication/slot.h b/src/include/replication/slot.h index f833b1d6dc..af8c7b5baa 100644 --- a/src/include/replication/slot.h +++ b/src/include/replication/slot.h @@ -199,8 +199,8 @@ extern void ReplicationSlotCreate(const char *name, bool db_specific, extern void ReplicationSlotPersist(void); extern void ReplicationSlotDrop(const char *name, bool nowait); -extern void ReplicationSlotAcquire(const char *name, bool nowait); -extern void ReplicationSlotRelease(void); +extern void ReplicationSlotAcquire(const char *name, bool nowait, bool msg_ok); +extern void ReplicationSlotRelease(bool msg_ok); extern void ReplicationSlotCleanup(void); extern void ReplicationSlotSave(void); extern void ReplicationSlotMarkDirty(void); -- 2.25.1