On Wed, Jan 2, 2019, at 11:02, Petr Jelinek wrote:
> The patch generally looks good, but the documentation of max_wal_senders > needs updating. In config.sgml we say: > > > WAL sender processes count towards the total number > > of connections, so this parameter's value must be less than > > <xref linkend="guc-max-connections"/> minus > > <xref linkend="guc-superuser-reserved-connections"/>. > > This is now misleading. Thank you. Attached the new version(called it v8) with the following changes to the documentation: diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index e94b305add..6634ce8cdc 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -697,8 +697,7 @@ include_dir 'conf.d' <para> The default value is three connections. The value must be less - than <varname>max_connections</varname> minus - <xref linkend="guc-max-wal-senders"/>. + than <varname>max_connections</varname>. This parameter can only be set at server start. </para> </listitem> @@ -3453,24 +3452,25 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows </term> <listitem> <para> - Specifies the maximum number of concurrent connections from - standby servers or streaming base backup clients (i.e., the - maximum number of simultaneously running WAL sender - processes). The default is 10. The value 0 means replication is - disabled. WAL sender processes count towards the total number - of connections, so this parameter's value must be less than - <xref linkend="guc-max-connections"/> minus - <xref linkend="guc-superuser-reserved-connections"/>. - Abrupt streaming client disconnection might leave an orphaned - connection slot behind until - a timeout is reached, so this parameter should be set slightly - higher than the maximum number of expected clients so disconnected - clients can immediately reconnect. This parameter can only - be set at server start. + Specifies the maximum number of concurrent connections from standby + servers or streaming base backup clients (i.e., the maximum number of + simultaneously running WAL sender processes). The default is 10. The + value 0 means replication is disabled. Abrupt streaming client + disconnection might leave an orphaned connection slot behind until a + timeout is reached, so this parameter should be set slightly higher + than the maximum number of expected clients so disconnected clients + can immediately reconnect. This parameter can only be set at server + start. Also, <varname>wal_level</varname> must be set to <literal>replica</literal> or higher to allow connections from standby servers. </para> + + <para> + When running a standby server, you must set this parameter to the + same or higher value than on the master server. Otherwise, queries + will not be allowed in the standby server. + </para> </listitem> </varlistentry> Cheers, Oleksii
diff --git a/doc/src/sgml/config.sgml b/doc/src/sgml/config.sgml index e94b305add..6634ce8cdc 100644 --- a/doc/src/sgml/config.sgml +++ b/doc/src/sgml/config.sgml @@ -697,8 +697,7 @@ include_dir 'conf.d' <para> The default value is three connections. The value must be less - than <varname>max_connections</varname> minus - <xref linkend="guc-max-wal-senders"/>. + than <varname>max_connections</varname>. This parameter can only be set at server start. </para> </listitem> @@ -3453,24 +3452,25 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows </term> <listitem> <para> - Specifies the maximum number of concurrent connections from - standby servers or streaming base backup clients (i.e., the - maximum number of simultaneously running WAL sender - processes). The default is 10. The value 0 means replication is - disabled. WAL sender processes count towards the total number - of connections, so this parameter's value must be less than - <xref linkend="guc-max-connections"/> minus - <xref linkend="guc-superuser-reserved-connections"/>. - Abrupt streaming client disconnection might leave an orphaned - connection slot behind until - a timeout is reached, so this parameter should be set slightly - higher than the maximum number of expected clients so disconnected - clients can immediately reconnect. This parameter can only - be set at server start. + Specifies the maximum number of concurrent connections from standby + servers or streaming base backup clients (i.e., the maximum number of + simultaneously running WAL sender processes). The default is 10. The + value 0 means replication is disabled. Abrupt streaming client + disconnection might leave an orphaned connection slot behind until a + timeout is reached, so this parameter should be set slightly higher + than the maximum number of expected clients so disconnected clients + can immediately reconnect. This parameter can only be set at server + start. Also, <varname>wal_level</varname> must be set to <literal>replica</literal> or higher to allow connections from standby servers. </para> + + <para> + When running a standby server, you must set this parameter to the + same or higher value than on the master server. Otherwise, queries + will not be allowed in the standby server. + </para> </listitem> </varlistentry> diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml index d8fd195da0..36ea513273 100644 --- a/doc/src/sgml/high-availability.sgml +++ b/doc/src/sgml/high-availability.sgml @@ -2172,6 +2172,11 @@ LOG: database system is ready to accept read only connections <varname>max_locks_per_transaction</varname> </para> </listitem> + <listitem> + <para> + <varname>max_wal_senders</varname> + </para> + </listitem> <listitem> <para> <varname>max_worker_processes</varname> diff --git a/doc/src/sgml/runtime.sgml b/doc/src/sgml/runtime.sgml index 8d9d40664b..18665895cb 100644 --- a/doc/src/sgml/runtime.sgml +++ b/doc/src/sgml/runtime.sgml @@ -717,13 +717,13 @@ psql: could not connect to server: No such file or directory <row> <entry><varname>SEMMNI</varname></entry> <entry>Maximum number of semaphore identifiers (i.e., sets)</entry> - <entry>at least <literal>ceil((max_connections + autovacuum_max_workers + max_worker_processes + 5) / 16)</literal> plus room for other applications</entry> + <entry>at least <literal>ceil((max_connections + autovacuum_max_workers + wax_wal_senders + max_worker_processes + 5) / 16)</literal> plus room for other applications</entry> </row> <row> <entry><varname>SEMMNS</varname></entry> <entry>Maximum number of semaphores system-wide</entry> - <entry><literal>ceil((max_connections + autovacuum_max_workers + max_worker_processes + 5) / 16) * 17</literal> plus room for other applications</entry> + <entry><literal>ceil((max_connections + autovacuum_max_workers + max_wal_senders + max_worker_processes + 5) / 16) * 17</literal> plus room for other applications</entry> </row> <row> @@ -780,13 +780,13 @@ psql: could not connect to server: No such file or directory other applications. The maximum number of semaphores in the system is set by <varname>SEMMNS</varname>, which consequently must be at least as high as <varname>max_connections</varname> plus - <varname>autovacuum_max_workers</varname> plus <varname>max_worker_processes</varname>, - plus one extra for each 16 + <varname>autovacuum_max_workers</varname> plus <varname>max_wal_senders</varname>, + plus <varname>max_worker_processes</varname>, plus one extra for each 16 allowed connections plus workers (see the formula in <xref linkend="sysvipc-parameters"/>). The parameter <varname>SEMMNI</varname> determines the limit on the number of semaphore sets that can exist on the system at one time. Hence this parameter must be at - least <literal>ceil((max_connections + autovacuum_max_workers + max_worker_processes + 5) / 16)</literal>. + least <literal>ceil((max_connections + autovacuum_max_workers + max_wal_senders + max_worker_processes + 5) / 16)</literal>. Lowering the number of allowed connections is a temporary workaround for failures, which are usually confusingly worded <quote>No space diff --git a/src/backend/access/rmgrdesc/xlogdesc.c b/src/backend/access/rmgrdesc/xlogdesc.c index 00741c7b09..1a304f19b8 100644 --- a/src/backend/access/rmgrdesc/xlogdesc.c +++ b/src/backend/access/rmgrdesc/xlogdesc.c @@ -109,11 +109,12 @@ xlog_desc(StringInfo buf, XLogReaderState *record) } } - appendStringInfo(buf, "max_connections=%d max_worker_processes=%d " - "max_prepared_xacts=%d max_locks_per_xact=%d " - "wal_level=%s wal_log_hints=%s " - "track_commit_timestamp=%s", + appendStringInfo(buf, "max_connections=%d max_wal_senders=%d " + "max_worker_processes=%d max_prepared_xacts=%d " + "max_locks_per_xact=%d wal_level=%s " + "wal_log_hints=%s track_commit_timestamp=%s", xlrec.MaxConnections, + xlrec.max_wal_senders, xlrec.max_worker_processes, xlrec.max_prepared_xacts, xlrec.max_locks_per_xact, diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 26b4977acb..7c5caee145 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -5256,6 +5256,7 @@ BootStrapXLOG(void) /* Set important parameter values for use when replaying WAL */ ControlFile->MaxConnections = MaxConnections; + ControlFile->max_wal_senders = max_wal_senders; ControlFile->max_worker_processes = max_worker_processes; ControlFile->max_prepared_xacts = max_prepared_xacts; ControlFile->max_locks_per_xact = max_locks_per_xact; @@ -6167,6 +6168,9 @@ CheckRequiredParameterValues(void) RecoveryRequiresIntParameter("max_connections", MaxConnections, ControlFile->MaxConnections); + RecoveryRequiresIntParameter("max_wal_senders", + max_wal_senders, + ControlFile->max_wal_senders); RecoveryRequiresIntParameter("max_worker_processes", max_worker_processes, ControlFile->max_worker_processes); @@ -9459,6 +9463,7 @@ XLogReportParameters(void) if (wal_level != ControlFile->wal_level || wal_log_hints != ControlFile->wal_log_hints || MaxConnections != ControlFile->MaxConnections || + max_wal_senders != ControlFile->max_wal_senders || max_worker_processes != ControlFile->max_worker_processes || max_prepared_xacts != ControlFile->max_prepared_xacts || max_locks_per_xact != ControlFile->max_locks_per_xact || @@ -9477,6 +9482,7 @@ XLogReportParameters(void) XLogRecPtr recptr; xlrec.MaxConnections = MaxConnections; + xlrec.max_wal_senders = max_wal_senders; xlrec.max_worker_processes = max_worker_processes; xlrec.max_prepared_xacts = max_prepared_xacts; xlrec.max_locks_per_xact = max_locks_per_xact; @@ -9492,6 +9498,7 @@ XLogReportParameters(void) } ControlFile->MaxConnections = MaxConnections; + ControlFile->max_wal_senders = max_wal_senders; ControlFile->max_worker_processes = max_worker_processes; ControlFile->max_prepared_xacts = max_prepared_xacts; ControlFile->max_locks_per_xact = max_locks_per_xact; @@ -9895,6 +9902,7 @@ xlog_redo(XLogReaderState *record) LWLockAcquire(ControlFileLock, LW_EXCLUSIVE); ControlFile->MaxConnections = xlrec.MaxConnections; + ControlFile->max_wal_senders = xlrec.max_wal_senders; ControlFile->max_worker_processes = xlrec.max_worker_processes; ControlFile->max_prepared_xacts = xlrec.max_prepared_xacts; ControlFile->max_locks_per_xact = xlrec.max_locks_per_xact; diff --git a/src/backend/postmaster/postmaster.c b/src/backend/postmaster/postmaster.c index 406cc2cf2d..93e039425c 100644 --- a/src/backend/postmaster/postmaster.c +++ b/src/backend/postmaster/postmaster.c @@ -885,11 +885,11 @@ PostmasterMain(int argc, char *argv[]) /* * Check for invalid combinations of GUC settings. */ - if (ReservedBackends + max_wal_senders >= MaxConnections) + if (ReservedBackends >= MaxConnections) { - write_stderr("%s: superuser_reserved_connections (%d) plus max_wal_senders (%d) must be less than max_connections (%d)\n", + write_stderr("%s: superuser_reserved_connections (%d) must be less than max_connections (%d)\n", progname, - ReservedBackends, max_wal_senders, MaxConnections); + ReservedBackends, MaxConnections); ExitPostmaster(1); } if (XLogArchiveMode > ARCHIVE_MODE_OFF && wal_level == WAL_LEVEL_MINIMAL) @@ -5528,7 +5528,7 @@ int MaxLivePostmasterChildren(void) { return 2 * (MaxConnections + autovacuum_max_workers + 1 + - max_worker_processes); + max_wal_senders + max_worker_processes); } /* diff --git a/src/backend/replication/walsender.c b/src/backend/replication/walsender.c index d1a8113cb6..df073673f5 100644 --- a/src/backend/replication/walsender.c +++ b/src/backend/replication/walsender.c @@ -2273,8 +2273,8 @@ InitWalSenderSlot(void) Assert(MyWalSnd == NULL); /* - * Find a free walsender slot and reserve it. If this fails, we must be - * out of WalSnd structures. + * Find a free walsender slot and reserve it. This must not fail due + * to the prior check for free walsenders at InitProcess. */ for (i = 0; i < max_wal_senders; i++) { @@ -2310,13 +2310,7 @@ InitWalSenderSlot(void) break; } } - if (MyWalSnd == NULL) - ereport(FATAL, - (errcode(ERRCODE_TOO_MANY_CONNECTIONS), - errmsg("number of requested standby connections " - "exceeds max_wal_senders (currently %d)", - max_wal_senders))); - + Assert(MyWalSnd != NULL); /* Arrange to clean up at walsender exit */ on_shmem_exit(WalSndKill, 0); } diff --git a/src/backend/storage/lmgr/proc.c b/src/backend/storage/lmgr/proc.c index 33387fb71b..fe33fc42e3 100644 --- a/src/backend/storage/lmgr/proc.c +++ b/src/backend/storage/lmgr/proc.c @@ -43,6 +43,7 @@ #include "postmaster/autovacuum.h" #include "replication/slot.h" #include "replication/syncrep.h" +#include "replication/walsender.h" #include "storage/condition_variable.h" #include "storage/standby.h" #include "storage/ipc.h" @@ -147,8 +148,9 @@ ProcGlobalSemas(void) * running out when trying to start another backend is a common failure. * So, now we grab enough semaphores to support the desired max number * of backends immediately at initialization --- if the sysadmin has set - * MaxConnections, max_worker_processes, or autovacuum_max_workers higher - * than his kernel will support, he'll find out sooner rather than later. + * MaxConnections, max_wal_senders, max_worker_processes, or + * autovacuum_max_workers higher than his kernel will support, he'll + * find out sooner rather than later. * * Another reason for creating semaphores here is that the semaphore * implementation typically requires us to create semaphores in the @@ -180,6 +182,7 @@ InitProcGlobal(void) ProcGlobal->freeProcs = NULL; ProcGlobal->autovacFreeProcs = NULL; ProcGlobal->bgworkerFreeProcs = NULL; + ProcGlobal->walsenderFreeProcs = NULL; ProcGlobal->startupProc = NULL; ProcGlobal->startupProcPid = 0; ProcGlobal->startupBufferPinWaitBufId = -1; @@ -253,13 +256,20 @@ InitProcGlobal(void) ProcGlobal->autovacFreeProcs = &procs[i]; procs[i].procgloballist = &ProcGlobal->autovacFreeProcs; } - else if (i < MaxBackends) + else if (i < MaxBackends - max_wal_senders) { /* PGPROC for bgworker, add to bgworkerFreeProcs list */ procs[i].links.next = (SHM_QUEUE *) ProcGlobal->bgworkerFreeProcs; ProcGlobal->bgworkerFreeProcs = &procs[i]; procs[i].procgloballist = &ProcGlobal->bgworkerFreeProcs; } + else if (i < MaxBackends) + { + /* PGPROC for walsender, add to walsenderFreeProcs list */ + procs[i].links.next = (SHM_QUEUE *) ProcGlobal->walsenderFreeProcs; + ProcGlobal->walsenderFreeProcs = &procs[i]; + procs[i].procgloballist = &ProcGlobal->walsenderFreeProcs; + } /* Initialize myProcLocks[] shared memory queues. */ for (j = 0; j < NUM_LOCK_PARTITIONS; j++) @@ -311,6 +321,8 @@ InitProcess(void) procgloballist = &ProcGlobal->autovacFreeProcs; else if (IsBackgroundWorker) procgloballist = &ProcGlobal->bgworkerFreeProcs; + else if (am_walsender) + procgloballist = &ProcGlobal->walsenderFreeProcs; else procgloballist = &ProcGlobal->freeProcs; @@ -341,6 +353,12 @@ InitProcess(void) * in the autovacuum case? */ SpinLockRelease(ProcStructLock); + if (am_walsender) + ereport(FATAL, + (errcode(ERRCODE_TOO_MANY_CONNECTIONS), + errmsg("number of requested standby connections " + "exceeds max_wal_senders (currently %d)", + max_wal_senders))); ereport(FATAL, (errcode(ERRCODE_TOO_MANY_CONNECTIONS), errmsg("sorry, too many clients already"))); diff --git a/src/backend/utils/init/postinit.c b/src/backend/utils/init/postinit.c index b636b1e262..44e27866df 100644 --- a/src/backend/utils/init/postinit.c +++ b/src/backend/utils/init/postinit.c @@ -527,7 +527,7 @@ InitializeMaxBackends(void) /* the extra unit accounts for the autovacuum launcher */ MaxBackends = MaxConnections + autovacuum_max_workers + 1 + - max_worker_processes; + max_worker_processes + max_wal_senders; /* internal error because the values were all checked previously */ if (MaxBackends > MAX_BACKENDS) @@ -811,12 +811,9 @@ InitPostgres(const char *in_dbname, Oid dboid, const char *username, } /* - * The last few connection slots are reserved for superusers. Although - * replication connections currently require superuser privileges, we - * don't allow them to consume the reserved slots, which are intended for - * interactive use. + * The last few connection slots are reserved for superusers. */ - if ((!am_superuser || am_walsender) && + if ((!am_superuser && !am_walsender) && ReservedBackends > 0 && !HaveNFreeProcs(ReservedBackends)) ereport(FATAL, diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 6fe1939881..3255f74ea3 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -2054,7 +2054,7 @@ static struct config_int ConfigureNamesInt[] = }, { - /* see max_connections and max_wal_senders */ + /* see max_connections */ {"superuser_reserved_connections", PGC_POSTMASTER, CONN_AUTH_SETTINGS, gettext_noop("Sets the number of connection slots reserved for superusers."), NULL @@ -2572,7 +2572,6 @@ static struct config_int ConfigureNamesInt[] = }, { - /* see max_connections and superuser_reserved_connections */ {"max_wal_senders", PGC_POSTMASTER, REPLICATION_SENDING, gettext_noop("Sets the maximum number of simultaneously running WAL sender processes."), NULL diff --git a/src/bin/pg_controldata/pg_controldata.c b/src/bin/pg_controldata/pg_controldata.c index 895a51f89d..23c90f2358 100644 --- a/src/bin/pg_controldata/pg_controldata.c +++ b/src/bin/pg_controldata/pg_controldata.c @@ -302,6 +302,8 @@ main(int argc, char *argv[]) ControlFile->wal_log_hints ? _("on") : _("off")); printf(_("max_connections setting: %d\n"), ControlFile->MaxConnections); + printf(_("max_wal_senders setting: %d\n"), + ControlFile->max_wal_senders); printf(_("max_worker_processes setting: %d\n"), ControlFile->max_worker_processes); printf(_("max_prepared_xacts setting: %d\n"), diff --git a/src/bin/pg_resetwal/pg_resetwal.c b/src/bin/pg_resetwal/pg_resetwal.c index 6fb403a5a8..299818d2ac 100644 --- a/src/bin/pg_resetwal/pg_resetwal.c +++ b/src/bin/pg_resetwal/pg_resetwal.c @@ -728,6 +728,7 @@ GuessControlValues(void) ControlFile.wal_log_hints = false; ControlFile.track_commit_timestamp = false; ControlFile.MaxConnections = 100; + ControlFile.max_wal_senders = 10; ControlFile.max_worker_processes = 8; ControlFile.max_prepared_xacts = 0; ControlFile.max_locks_per_xact = 64; @@ -955,6 +956,7 @@ RewriteControlFile(void) ControlFile.wal_log_hints = false; ControlFile.track_commit_timestamp = false; ControlFile.MaxConnections = 100; + ControlFile.max_wal_senders = 10; ControlFile.max_worker_processes = 8; ControlFile.max_prepared_xacts = 0; ControlFile.max_locks_per_xact = 64; diff --git a/src/include/access/xlog_internal.h b/src/include/access/xlog_internal.h index 30610b3ea9..bd96c0f29a 100644 --- a/src/include/access/xlog_internal.h +++ b/src/include/access/xlog_internal.h @@ -225,6 +225,7 @@ typedef XLogLongPageHeaderData *XLogLongPageHeader; typedef struct xl_parameter_change { int MaxConnections; + int max_wal_senders; int max_worker_processes; int max_prepared_xacts; int max_locks_per_xact; diff --git a/src/include/catalog/pg_control.h b/src/include/catalog/pg_control.h index 773d9e6eba..683645dff2 100644 --- a/src/include/catalog/pg_control.h +++ b/src/include/catalog/pg_control.h @@ -176,6 +176,7 @@ typedef struct ControlFileData int wal_level; bool wal_log_hints; int MaxConnections; + int max_wal_senders; int max_worker_processes; int max_prepared_xacts; int max_locks_per_xact; diff --git a/src/include/storage/proc.h b/src/include/storage/proc.h index cb613c8076..e89f7337b4 100644 --- a/src/include/storage/proc.h +++ b/src/include/storage/proc.h @@ -255,6 +255,8 @@ typedef struct PROC_HDR PGPROC *autovacFreeProcs; /* Head of list of bgworker free PGPROC structures */ PGPROC *bgworkerFreeProcs; + /* Head of list of walsender free PGPROC structures */ + PGPROC *walsenderFreeProcs; /* First pgproc waiting for group XID clear */ pg_atomic_uint32 procArrayGroupFirst; /* First pgproc waiting for group transaction status update */