Hello. Although replication slot is helpful to avoid unwanted WAL deletion, on the other hand it can cause a disastrous situation by keeping WAL segments without a limit. Removing the causal repslot will save this situation but it is not doable if the standby is active. We should do a rather complex and forcible steps to relieve the situation especially in an automatic manner. (As for me, specifically in an HA cluster.)
This patch adds a GUC to put a limit to the number of segments that replication slots can keep. Hitting the limit during checkpoint shows a warining and the segments older than the limit are removed. > WARNING: restart LSN of replication slots is ignored by checkpoint > DETAIL: Some replication slots lose required WAL segnents to continue. Another measure would be automatic deletion or inactivation of the culprit slot but it seems too complex for the problem. As we have already postponed some patches by the triage for the last commit fest, this might should be postponed to PG11. regards, -- Kyotaro Horiguchi NTT Open Source Software Center
>From 367205d51ef471defd0f9df76840dee1c4cd4036 Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyot...@lab.ntt.co.jp> Date: Tue, 28 Feb 2017 11:39:48 +0900 Subject: [PATCH] Add WAL releaf vent for replication slots Adds a capability to limit the number of segments kept by replication slots by a GUC variable. --- src/backend/access/transam/xlog.c | 12 ++++++++++++ src/backend/utils/misc/guc.c | 10 ++++++++++ src/backend/utils/misc/postgresql.conf.sample | 1 + src/include/access/xlog.h | 1 + 4 files changed, 24 insertions(+) diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 5016273..6c57e99 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -104,6 +104,7 @@ int wal_level = WAL_LEVEL_MINIMAL; int CommitDelay = 0; /* precommit delay in microseconds */ int CommitSiblings = 5; /* # concurrent xacts needed to sleep */ int wal_retrieve_retry_interval = 5000; +int max_slot_wal_keep_segments = 0; #ifdef WAL_DEBUG bool XLOG_DEBUG = false; @@ -9267,6 +9268,17 @@ KeepLogSeg(XLogRecPtr recptr, XLogSegNo *logSegNo) XLByteToSeg(keep, slotSegNo); + /* emergency vent */ + if (max_slot_wal_keep_segments > 0 && + slotSegNo < segno - max_slot_wal_keep_segments) + { + ereport(WARNING, + (errmsg ("restart LSN of replication slots is ignored by checkpoint"), + errdetail("Some replication slots lose required WAL segnents to continue."))); + /* slotSegNo cannot be negative here */ + slotSegNo = segno - max_slot_wal_keep_segments; + } + if (slotSegNo <= 0) segno = 1; else if (slotSegNo < segno) diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 0707f66..4ff1c2a 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2335,6 +2335,16 @@ static struct config_int ConfigureNamesInt[] = }, { + {"max_slot_wal_keep_segments", PGC_SIGHUP, REPLICATION_SENDING, + gettext_noop("Sets the maximum keep segments by replication slots."), + NULL + }, + &max_slot_wal_keep_segments, + 0, 0, INT_MAX, + NULL, NULL, NULL + }, + + { {"wal_sender_timeout", PGC_SIGHUP, REPLICATION_SENDING, gettext_noop("Sets the maximum time to wait for WAL replication."), NULL, diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample index 157d775..7424a63 100644 --- a/src/backend/utils/misc/postgresql.conf.sample +++ b/src/backend/utils/misc/postgresql.conf.sample @@ -233,6 +233,7 @@ #max_wal_senders = 10 # max number of walsender processes # (change requires restart) #wal_keep_segments = 0 # in logfile segments, 16MB each; 0 disables +#max_slot_wal_keep_segments = 0 # in logfile segments, 16MB each; 0 disables #wal_sender_timeout = 60s # in milliseconds; 0 disables #max_replication_slots = 10 # max number of replication slots diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h index 9f036c7..fae1b87 100644 --- a/src/include/access/xlog.h +++ b/src/include/access/xlog.h @@ -97,6 +97,7 @@ extern bool reachedConsistency; extern int min_wal_size; extern int max_wal_size; extern int wal_keep_segments; +extern int max_slot_wal_keep_segments; extern int XLOGbuffers; extern int XLogArchiveTimeout; extern int wal_retrieve_retry_interval; -- 2.9.2
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers