Hello
Updated version attached. Merge conflict was about tests count in
001_stream_rep.pl. Nothing else was changed. My approach can be still
incorrect, any redesign ideas are welcome. Thanks in advance!
regards, Sergei
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml
index 84341a30e5..054be17e08 100644
--- a/doc/src/sgml/config.sgml
+++ b/doc/src/sgml/config.sgml
@@ -3916,9 +3916,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
<varname>primary_conninfo</varname> string.
</para>
<para>
- This parameter can only be set at server start.
+ This parameter can only be set in the <filename>postgresql.conf</filename>
+ file or on the server command line.
This setting has no effect if the server is not in standby mode.
</para>
+ <para>
+ WAL receiver will be restarted after <varname>primary_conninfo</varname>
+ was changed.
+ </para>
</listitem>
</varlistentry>
<varlistentry id="guc-primary-slot-name" xreflabel="primary_slot_name">
@@ -3933,9 +3938,14 @@ ANY <replaceable class="parameter">num_sync</replaceable> ( <replaceable class="
connecting to the sending server via streaming replication to control
resource removal on the upstream node
(see <xref linkend="streaming-replication-slots"/>).
- This parameter can only be set at server start.
+ This parameter can only be set in the <filename>postgresql.conf</filename>
+ file or on the server command line.
This setting has no effect if <varname>primary_conninfo</varname> is not
- set.
+ set or the server is not in standby mode.
+ </para>
+ <para>
+ WAL receiver will be restarted after <varname>primary_slot_name</varname>
+ was changed.
</para>
</listitem>
</varlistentry>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 3e2c4e3e5b..964989432c 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -12125,6 +12125,42 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
return false; /* not reached */
}
+void
+ProcessStartupSigHup(void)
+{
+ char *conninfo = pstrdup(PrimaryConnInfo);
+ char *slotname = pstrdup(PrimarySlotName);
+ bool conninfoChanged;
+ bool slotnameChanged;
+
+ ProcessConfigFile(PGC_SIGHUP);
+
+ /*
+ * We need restart XLOG_FROM_STREAM source if replication settings was
+ * changed
+ */
+ conninfoChanged = (strcmp(conninfo, PrimaryConnInfo) != 0);
+ slotnameChanged = (strcmp(slotname, PrimarySlotName) != 0);
+
+ if ((conninfoChanged || slotnameChanged) &&
+ currentSource == XLOG_FROM_STREAM
+ && WalRcvRunning())
+ {
+ if (conninfoChanged && slotnameChanged)
+ ereport(LOG,
+ (errmsg("The WAL receiver is going to be restarted due to change of primary_conninfo and primary_slot_name")));
+ else
+ ereport(LOG,
+ (errmsg("The WAL receiver is going to be restarted due to change of %s",
+ conninfoChanged ? "primary_conninfo" : "primary_slot_name")));
+
+ pendingRestartSource = true;
+ }
+
+ pfree(conninfo);
+ pfree(slotname);
+}
+
/*
* Determine what log level should be used to report a corrupt WAL record
* in the current WAL page, previously read by XLogPageRead().
diff --git a/src/backend/postmaster/startup.c b/src/backend/postmaster/startup.c
index 5048a2c2aa..9bf5c792fe 100644
--- a/src/backend/postmaster/startup.c
+++ b/src/backend/postmaster/startup.c
@@ -147,7 +147,7 @@ HandleStartupProcInterrupts(void)
if (got_SIGHUP)
{
got_SIGHUP = false;
- ProcessConfigFile(PGC_SIGHUP);
+ ProcessStartupSigHup();
}
/*
diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c
index 92c4fee8f8..e54d8e7172 100644
--- a/src/backend/utils/misc/guc.c
+++ b/src/backend/utils/misc/guc.c
@@ -3571,7 +3571,7 @@ static struct config_string ConfigureNamesString[] =
},
{
- {"primary_conninfo", PGC_POSTMASTER, REPLICATION_STANDBY,
+ {"primary_conninfo", PGC_SIGHUP, REPLICATION_STANDBY,
gettext_noop("Sets the connection string to be used to connect to the sending server."),
NULL,
GUC_SUPERUSER_ONLY
@@ -3582,7 +3582,7 @@ static struct config_string ConfigureNamesString[] =
},
{
- {"primary_slot_name", PGC_POSTMASTER, REPLICATION_STANDBY,
+ {"primary_slot_name", PGC_SIGHUP, REPLICATION_STANDBY,
gettext_noop("Sets the name of the replication slot to use on the sending server."),
NULL
},
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index d519252aad..9e49020b19 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -320,6 +320,7 @@ extern void SetWalWriterSleeping(bool sleeping);
extern void XLogRequestWalReceiverReply(void);
+extern void ProcessStartupSigHup(void);
extern void assign_max_wal_size(int newval, void *extra);
extern void assign_checkpoint_completion_target(double newval, void *extra);
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
index 3c743d7d7c..ae80f4df3a 100644
--- a/src/test/recovery/t/001_stream_rep.pl
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -3,7 +3,7 @@ use strict;
use warnings;
use PostgresNode;
use TestLib;
-use Test::More tests => 32;
+use Test::More tests => 33;
# Initialize master node
my $node_master = get_new_node('master');
@@ -208,7 +208,9 @@ $node_standby_2->append_conf('postgresql.conf',
"primary_slot_name = $slotname_2");
$node_standby_2->append_conf('postgresql.conf',
"wal_receiver_status_interval = 1");
-$node_standby_2->restart;
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
# Fetch xmin columns from slot's pg_replication_slots row, after waiting for
# given boolean condition to be true to ensure we've reached a quiescent state
@@ -344,3 +346,21 @@ is($catalog_xmin, '',
is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
is($catalog_xmin, '',
'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf',
+ "primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+ $node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+ qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 13e0d2366f..3e2c4e3e5b 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -798,10 +798,11 @@ static XLogSource readSource = 0; /* XLOG_FROM_* code */
* different from readSource in that this is always set, even when we don't
* currently have a WAL file open. If lastSourceFailed is set, our last
* attempt to read from currentSource failed, and we should try another source
- * next.
+ * next. If pendingRestartSource is set we want restart current source
*/
static XLogSource currentSource = 0; /* XLOG_FROM_* code */
static bool lastSourceFailed = false;
+static bool pendingRestartSource = false;
typedef struct XLogPageReadPrivate
{
@@ -11784,48 +11785,6 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
if (!StandbyMode)
return false;
- /*
- * If primary_conninfo is set, launch walreceiver to try
- * to stream the missing WAL.
- *
- * If fetching_ckpt is true, RecPtr points to the initial
- * checkpoint location. In that case, we use RedoStartLSN
- * as the streaming start position instead of RecPtr, so
- * that when we later jump backwards to start redo at
- * RedoStartLSN, we will have the logs streamed already.
- */
- if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
- {
- XLogRecPtr ptr;
- TimeLineID tli;
-
- if (fetching_ckpt)
- {
- ptr = RedoStartLSN;
- tli = ControlFile->checkPointCopy.ThisTimeLineID;
- }
- else
- {
- ptr = RecPtr;
-
- /*
- * Use the record begin position to determine the
- * TLI, rather than the position we're reading.
- */
- tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
-
- if (curFileTLI > 0 && tli < curFileTLI)
- elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
- (uint32) (tliRecPtr >> 32),
- (uint32) tliRecPtr,
- tli, curFileTLI);
- }
- curFileTLI = tli;
- RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
- PrimarySlotName);
- receivedUpto = 0;
- }
-
/*
* Move to XLOG_FROM_STREAM state in either case. We'll
* get immediate failure if we didn't launch walreceiver,
@@ -11925,10 +11884,83 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess,
lastSourceFailed ? "failure" : "success");
/*
- * We've now handled possible failure. Try to read from the chosen
- * source.
+ * Prepare to read from the chosen source if we asked restart source
+ * or last source was failed
+ */
+ if (pendingRestartSource || lastSourceFailed)
+ {
+ /*
+ * make sure that walreceiver is not active. this is needed for
+ * all supported sources
+ */
+ if (WalRcvRunning())
+ ShutdownWalRcv();
+
+ switch (currentSource)
+ {
+ case XLOG_FROM_ARCHIVE:
+ case XLOG_FROM_PG_WAL:
+
+ /*
+ * We do not need additional actions here
+ */
+ break;
+
+ case XLOG_FROM_STREAM:
+
+ /*
+ * If primary_conninfo is set, launch walreceiver to try
+ * to stream the missing WAL.
+ *
+ * If fetching_ckpt is true, RecPtr points to the initial
+ * checkpoint location. In that case, we use RedoStartLSN
+ * as the streaming start position instead of RecPtr, so
+ * that when we later jump backwards to start redo at
+ * RedoStartLSN, we will have the logs streamed already.
+ */
+ if (PrimaryConnInfo && strcmp(PrimaryConnInfo, "") != 0)
+ {
+ XLogRecPtr ptr;
+ TimeLineID tli;
+
+ if (fetching_ckpt)
+ {
+ ptr = RedoStartLSN;
+ tli = ControlFile->checkPointCopy.ThisTimeLineID;
+ }
+ else
+ {
+ ptr = RecPtr;
+
+ /*
+ * Use the record begin position to determine the
+ * TLI, rather than the position we're reading.
+ */
+ tli = tliOfPointInHistory(tliRecPtr, expectedTLEs);
+
+ if (curFileTLI > 0 && tli < curFileTLI)
+ elog(ERROR, "according to history file, WAL location %X/%X belongs to timeline %u, but previous recovered WAL file came from timeline %u",
+ (uint32) (tliRecPtr >> 32),
+ (uint32) tliRecPtr,
+ tli, curFileTLI);
+ }
+ curFileTLI = tli;
+ RequestXLogStreaming(tli, ptr, PrimaryConnInfo,
+ PrimarySlotName);
+ receivedUpto = 0;
+ }
+ break;
+ default:
+ elog(ERROR, "unexpected WAL source %d", currentSource);
+ }
+ }
+
+ /*
+ * We've now handled possible failure and pending source restart. Try
+ * to read from the chosen source.
*/
lastSourceFailed = false;
+ pendingRestartSource = false;
switch (currentSource)
{