Hello
Thank you for review!
> - This parameter can only be set at server start.
> + This parameter can only be set in the <filename>postgresql.conf</filename>
> + file or on the server command line.
>
> I'm not sure it's good to change the context of this
> description. This was mentioning that changing of this parameter
> requires server (re)start. So if we want to be on the same
> context after rewriting, it would be like "This parameter can be
> set any time and causes WAL receiver restart with the new setting
> if the server is in standby mode."
Was written such way after this review:
https://www.postgresql.org/message-id/20181125214313.lydvmrraqjfrb3s2%40alap3.anarazel.de
> And If I'm not missing something, I don't find an (explict)
> paramter of postmaster for setting primary_conninfo.
Well, we have common -c option: -c name=value
> Couldn't we do the same thing by just skipping the wait and
> setting lastSourceFailed to true in the case of intentional
> walreceiver restart?
Yes, it's possible. Let's see... Done in attached variant.
We need check pendingWalRcvRestart before rescanLatestTimeLine lines.
regards, Sergei
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 6612f95f9f..1fa48058e8 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3929,9 +3929,15 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
<varname>primary_conninfo</varname> string.
</para>
<para>
- This parameter can only be set at server start.
+ This parameter can only be set in the <filename>postgresql.conf</filename>
+ file or on the server command line.
This setting has no effect if the server is not in standby mode.
</para>
+ <para>
+ If <varname>primary_conninfo</varname> is changed while WAL receiver is running,
+ the WAL receiver shuts down and then restarts with new setting,
+ except when primary_conninfo is an empty string.
+ </para>
</listitem>
</varlistentry>
<varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3946,9 +3952,13 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
connecting to the sending server via streaming replication to control
resource removal on the upstream node
(see <xref linkend="streaming-replication-slots"/>).
- This parameter can only be set at server start.
+ This parameter can only be set in the <filename>postgresql.conf</filename>
+ file or on the server command line.
This setting has no effect if <varname>primary_conninfo</varname> is not
- set.
+ set or the server is not in standby mode.
+ </para>
+ <para>
+ The WAL receiver is restarted after an update of <varname>primary_slot_name</varname>.
</para>
</listitem>
</varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index b7ff004234..f0e47cc663 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -803,6 +803,12 @@ static XLogSource readSource = 0; /* XLOG_FROM_* code */
static XLogSource currentSource = 0; /* XLOG_FROM_* code */
static bool lastSourceFailed = false;
+/*
+ * Need for restart running WalReceiver due the configuration change.
+ * Suitable only for XLOG_FROM_STREAM source
+ */
+static bool pendingWalRcvRestart = false;
+
typedef struct XLogPageReadPrivate
{
int emode;
@@ -11756,12 +11762,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
int oldSource = currentSource;
/*
- * First check if we failed to read from the current source, and
+ * First check if we failed to read from the current source or if
+ * we want to restart wal receiver, and
* advance the state machine if so. The failure to read might've
* happened outside this function, e.g when a CRC check fails on a
* record, or within this loop.
*/
- if (lastSourceFailed)
+ if (lastSourceFailed || pendingWalRcvRestart)
{
switch (currentSource)
{
@@ -11862,6 +11869,17 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
if (WalRcvStreaming())
ShutdownWalRcv();
+ /*
+ * If wal receiver is requested to restart, we skip the
+ * next XLOG_FROM_ARCHIVE to immediately starting it.
+ */
+ if (pendingWalRcvRestart)
+ {
+ lastSourceFailed = true;
+ currentSource = XLOG_FROM_ARCHIVE;
+ continue;
+ }
+
/*
* Before we sleep, re-scan for possible new timelines if
* we were requested to recover to the latest timeline.
@@ -11881,7 +11899,8 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
* obtaining the requested WAL. We're going to loop back
* and retry from the archive, but if it hasn't been long
* since last attempt, sleep wal_retrieve_retry_interval
- * milliseconds to avoid busy-waiting.
+ * milliseconds to avoid busy-waiting. We don't wait if
+ * explicitly requested to restart.
*/
now = GetCurrentTimestamp();
if (!TimestampDifferenceExceeds(last_fail_time, now,
@@ -11922,16 +11941,17 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
currentSource = XLOG_FROM_ARCHIVE;
}
- if (currentSource != oldSource)
+ if (currentSource != oldSource && !pendingWalRcvRestart)
elog(DEBUG2, "switched WAL source from %s to %s after %s",
xlogSourceNames[oldSource], xlogSourceNames[currentSource],
lastSourceFailed ? "failure" : "success");
/*
- * We've now handled possible failure. Try to read from the chosen
- * source.
+ * We've now handled possible failure and configuration change. Try to
+ * read from the chosen source.
*/
lastSourceFailed = false;
+ pendingWalRcvRestart = false;
switch (currentSource)
{
@@ -12096,6 +12116,45 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
return false; /* not reached */
}
+/*
+ * Re-read config file and plan to restart running walreceiver if
+ * connection settings was changed.
+ */
+void
+ProcessStartupSigHup(void)
+{
+ char *conninfo = pstrdup(PrimaryConnInfo);
+ char *slotname = pstrdup(PrimarySlotName);
+ bool conninfoChanged;
+ bool slotnameChanged;
+
+ ProcessConfigFile(PGC_SIGHUP);
+
+ /*
+ * We need restart walreceiver if replication settings was changed.
+ */
+ conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+ slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+ if ((conninfoChanged || slotnameChanged) &&
+ currentSource == XLOG_FROM_STREAM
+ && WalRcvRunning())
+ {
+ if (conninfoChanged && slotnameChanged)
+ ereport(LOG,
+ (errmsg("The WAL receiver is going to be restarted due to change of primary_conninfo and primary_slot_name")));
+ else
+ ereport(LOG,
+ (errmsg("The WAL receiver is going to be restarted due to change of %s",
+ conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+ pendingWalRcvRestart = true;
+ }
+
+ pfree(conninfo);
+ pfree(slotname);
+}
+
/*
* Determine what log level should be used to report a corrupt WAL record
* in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..62492daf6b 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -142,12 +142,12 @@ void
HandleStartupProcInterrupts(void)
{
/*
- * Check if we were requested to re-read config file.
+ * Process any requests or signals received recently.
*/
if (got_SIGHUP)
{
got_SIGHUP = false;
- ProcessConfigFile(PGC_SIGHUP);
+ ProcessStartupSigHup();
}
/*
diff --git a/src/backend/replication/walreceiver.c b/src/backend/replication/walreceiver.c
index 6abc780778..59834b9df8 100644
--- a/src/backend/replication/walreceiver.c
+++ b/src/backend/replication/walreceiver.c
@@ -644,7 +644,11 @@ WalRcvWaitForStartPosition(XLogRecPtr *startpoint, TimeLineID *startpointTLI)
walrcv->walRcvState == WALRCV_STOPPING);
if (walrcv->walRcvState == WALRCV_RESTARTING)
{
- /* we don't expect primary_conninfo to change */
+ /*
+ * We don't need handle changes of primary_conninfo or
+ * primary_slotname here. Startup process will shutdown running
+ * walreceiver in this case.
+ */
*startpoint = walrcv->receiveStart;
*startpointTLI = walrcv->receiveStartTLI;
walrcv->walRcvState = WALRCV_STREAMING;
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 90ffd89339..5ffe593517 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3558,7 +3558,7 @@ static struct config_string ConfigureNamesString[] =
},
{
- {"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+ {"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
gettext_noop("Sets the connection string to be used to connect to the sending server."),
NULL,
GUC_SUPERUSER_ONLY
@@ -3569,7 +3569,7 @@ static struct config_string ConfigureNamesString[] =
},
{
- {"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+ {"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
gettext_noop("Sets the name of the replication slot to use on the sending server."),
NULL
},
diff --git a/src/backend/utils/misc/postgresql.conf.sample b/src/backend/utils/misc/postgresql.conf.sample
index 0fc23e3a61..c25e406b9e 100644
--- a/src/backend/utils/misc/postgresql.conf.sample
+++ b/src/backend/utils/misc/postgresql.conf.sample
@@ -308,9 +308,7 @@
# These settings are ignored on a master server.
#primary_conninfo = '' # connection string to sending server
- # (change requires restart)
#primary_slot_name = '' # replication slot on sending server
- # (change requires restart)
#promote_trigger_file = '' # file name whose presence ends recovery
#hot_standby = on # "off" disallows queries during recovery
# (change requires restart)
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d519252aad..9e49020b19 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
extern void XLogRequestWalReceiverReply(void);
+extern void ProcessStartupSigHup(void);
extern void assign_max_wal_size(int newval, void *extra);
extern void assign_checkpoint_completion_target(double newval, void *extra);
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..ae80f4df3a 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 33;
# Initialize master node
my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
"primary_slot_name = $slotname_2");
$node_standby_2->append_conf('postgresql.conf',
"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
# Fetch xmin columns from slot's pg_replication_slots row, after waiting for
# given boolean condition to be true to ensure we've reached a quiescent state
@@ -344,3 +346,21 @@ is($catalog_xmin, '',
is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
is($catalog_xmin, '',
'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+ "primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+ $node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+ qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");