Michael Paquier wrote:
> > diff --git a/src/backend/access/transam/xlog.c
> > b/src/backend/access/transam/xlog.c
> > index 7375a78ffc..3a1f49e83a 100644
> > --- a/src/backend/access/transam/xlog.c
> > +++ b/src/backend/access/transam/xlog.c
> > @@ -81,8 +81,6 @@ extern uint32 bootstrap_data_checksum_version;
> > /* File path names (all relative to $PGDATA) */
> > #define RECOVERY_COMMAND_FILE "recovery.conf"
> > #define RECOVERY_COMMAND_DONE "recovery.done"
> > -#define PROMOTE_SIGNAL_FILE "promote"
> > -#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"
>
> Perhaps we could just move the whole set to xlog.h.
Done.
> > +Datum
> > +pg_promote(PG_FUNCTION_ARGS)
> > +{
> > + FILE *promote_file;
> > +
> > + if (!superuser())
> > + ereport(ERROR,
> > + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
> > + errmsg("must be superuser to promote standby
> > servers")));
>
> Let's remove this restriction at code level, and instead use
> function-level ACLs so as it is possible to allow non-superusers to
> trigger a promotion as well, say a dedicated role. We could add a
> system role for this purpose, but I am not sure that it is worth the
> addition yet.
Agreed, done.
> > + while ($self->safe_psql('postgres', "SELECT pg_is_in_recovery()") !=
> > 'f')
> > + {
> > + sleep 1;
> > + }
> > + return;
>
> This could go on infinitely, locking down buildfarm machines silently if
> a commit is not careful. What I would suggest to do instead is to not
> touch PostgresNode.pm at all, and to just modify one test to trigger
> promotion with the SQL function. Then from the test, you should add a
> comment that triggerring promotion from SQL is wanted instead of the
> internal routine, and then please add a call to poll_query_until() in
> the same way as what 6deb52b2 has removed.
I have modified recovery/t/004_timeline_switch.pl and recovery/t/009_twophase.pl
accordingly: one calls the function with "wait => true", the other
uses "wait => false" and waits as you suggested.
> As of now, this function returns true if promotion has been triggered,
> and false if not. However it seems to me that we should have something
> more consistent with "pg_ctl promote", so there are actually three
> possible states:
> 1) Node is in recovery, with promotion triggered. pg_promote() returns
> true in case of success in creating the trigger file.
> 2) Node is in recovery, with promotion triggered. pg_promote() returns
> false in case of failure in creating the trigger file.
> 3) Node is not in recovery, where I think a call to pg_promote should
> trigger an error. This is not handled in the patch.
So far I had returned "false" in the last case, but I am fine with
throwing an error in that case. Done.
> An other thing which has value is to implement a "wait" mode for this
> feature, or actually a nowait mode. You could simply do what pg_ctl
> does with its logic in get_control_dbstate() by looking at the control
> file state. I think that waiting for the promotion to happen should be
> the default.
I have implemented that.
> At the end this patch needs a bit more work.
Yes, it did. Thanks for the thorough review!
Yours,
Laurenz Albe
From 8e45dc7dfbb76eb1625323e3cd8f161779973528 Mon Sep 17 00:00:00 2001
From: Laurenz Albe <[email protected]>
Date: Mon, 15 Oct 2018 16:07:56 +0200
Subject: [PATCH] Add pg_promote() to promote standby servers
---
doc/src/sgml/func.sgml | 21 +++++++
doc/src/sgml/high-availability.sgml | 2 +-
doc/src/sgml/recovery-config.sgml | 3 +-
src/backend/access/transam/xlog.c | 6 --
src/backend/access/transam/xlogfuncs.c | 66 ++++++++++++++++++++++
src/backend/catalog/system_views.sql | 7 +++
src/include/access/xlog.h | 6 ++
src/include/catalog/pg_proc.dat | 4 ++
src/test/recovery/t/004_timeline_switch.pl | 6 +-
src/test/recovery/t/009_twophase.pl | 6 +-
10 files changed, 116 insertions(+), 11 deletions(-)
diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index 9a7f683658..5ecf7f5b9d 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -18731,6 +18731,9 @@ SELECT set_config('log_statement_stats', 'off', false);
<indexterm>
<primary>pg_terminate_backend</primary>
</indexterm>
+ <indexterm>
+ <primary>pg_promote</primary>
+ </indexterm>
<indexterm>
<primary>signal</primary>
@@ -18790,6 +18793,16 @@ SELECT set_config('log_statement_stats', 'off', false);
however only superusers can terminate superuser backends.
</entry>
</row>
+ <row>
+ <entry>
+ <literal><function>pg_promote(<parameter>wait</parameter> <type>boolean</type> DEFAULT true)</function></literal>
+ </entry>
+ <entry><type>boolean</type></entry>
+ <entry>Promote a physical standby server. This function is restricted to
+ superusers by default, but other users can be granted EXECUTE to run
+ the function.
+ </entry>
+ </row>
</tbody>
</tgroup>
</table>
@@ -18827,6 +18840,14 @@ SELECT set_config('log_statement_stats', 'off', false);
subprocess.
</para>
+ <para>
+ <function>pg_promote</function> can only be called on standby servers.
+ If the argument <parameter>wait</parameter> is <literal>true</literal>,
+ the function waits until promotion is complete or 60 seconds have passed,
+ otherwise the function returns immediately after sending the promotion
+ signal to the postmaster.
+ </para>
+
</sect2>
<sect2 id="functions-admin-backup">
diff --git a/doc/src/sgml/high-availability.sgml b/doc/src/sgml/high-availability.sgml
index 6f57362df7..8ccd1ffcd1 100644
--- a/doc/src/sgml/high-availability.sgml
+++ b/doc/src/sgml/high-availability.sgml
@@ -1472,7 +1472,7 @@ synchronous_standby_names = 'ANY 2 (s1, s2, s3)'
<para>
To trigger failover of a log-shipping standby server,
- run <command>pg_ctl promote</command> or create a trigger
+ run <command>pg_ctl promote</command>, call <function>pg_promote()</function>, or create a trigger
file with the file name and path specified by the <varname>trigger_file</varname>
setting in <filename>recovery.conf</filename>. If you're planning to use
<command>pg_ctl promote</command> to fail over, <varname>trigger_file</varname> is
diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml
index 92825fdf19..d06cd0b08e 100644
--- a/doc/src/sgml/recovery-config.sgml
+++ b/doc/src/sgml/recovery-config.sgml
@@ -439,7 +439,8 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows
<para>
Specifies a trigger file whose presence ends recovery in the
standby. Even if this value is not set, you can still promote
- the standby using <command>pg_ctl promote</command>.
+ the standby using <command>pg_ctl promote</command> or calling
+ <function>pg_promote()</function>.
This setting has no effect if <varname>standby_mode</varname> is <literal>off</literal>.
</para>
</listitem>
diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c
index 7375a78ffc..62fc418893 100644
--- a/src/backend/access/transam/xlog.c
+++ b/src/backend/access/transam/xlog.c
@@ -78,12 +78,6 @@
extern uint32 bootstrap_data_checksum_version;
-/* File path names (all relative to $PGDATA) */
-#define RECOVERY_COMMAND_FILE "recovery.conf"
-#define RECOVERY_COMMAND_DONE "recovery.done"
-#define PROMOTE_SIGNAL_FILE "promote"
-#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"
-
/* User-settable parameters */
int max_wal_size_mb = 1024; /* 1 GB */
diff --git a/src/backend/access/transam/xlogfuncs.c b/src/backend/access/transam/xlogfuncs.c
index 9731742978..7cd5db001e 100644
--- a/src/backend/access/transam/xlogfuncs.c
+++ b/src/backend/access/transam/xlogfuncs.c
@@ -35,6 +35,7 @@
#include "storage/fd.h"
#include "storage/ipc.h"
+#include <unistd.h>
/*
* Store label file and tablespace map during non-exclusive backups.
@@ -697,3 +698,68 @@ pg_backup_start_time(PG_FUNCTION_ARGS)
PG_RETURN_DATUM(xtime);
}
+
+#define WAIT_SECONDS 60
+#define WAITS_PER_SECOND 10
+
+/*
+ * Promote a standby server.
+ *
+ * A result of "true" means that promotion has been initiated.
+ */
+Datum
+pg_promote(PG_FUNCTION_ARGS)
+{
+ bool wait = PG_GETARG_BOOL(0);
+ FILE *promote_file;
+ int i;
+
+ if (!RecoveryInProgress())
+ ereport(ERROR,
+ (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
+ errmsg("recovery is not in progress"),
+ errhint("Only a server that is in recovery can be promoted.")));
+
+ /* create the promote signal file */
+ promote_file = AllocateFile(PROMOTE_SIGNAL_FILE, "w");
+ if (!promote_file)
+ {
+ ereport(WARNING,
+ (errmsg("could not create file \"%s\": %m", PROMOTE_SIGNAL_FILE)));
+ PG_RETURN_BOOL(false);
+ }
+
+ if (FreeFile(promote_file))
+ {
+ /* probably unreachable, but it is better to be safe */
+ ereport(WARNING,
+ (errmsg("could not write to file \"%s\": %m", PROMOTE_SIGNAL_FILE)));
+ PG_RETURN_BOOL(false);
+ }
+
+ /* signal the postmaster */
+ if (kill(PostmasterPid, SIGUSR1) != 0)
+ {
+ ereport(WARNING,
+ (errmsg("failed to send signal to postmaster: %m")));
+ (void) unlink(PROMOTE_SIGNAL_FILE);
+ PG_RETURN_BOOL(false);
+ }
+
+ /* return immediately if waiting was not requested */
+ if (wait)
+ PG_RETURN_BOOL(true);
+
+ /* wait for up to a minute for promotion */
+ for (i = 0; i < WAITS_PER_SECOND * WAIT_SECONDS; ++i)
+ {
+ if (!RecoveryInProgress())
+ PG_RETURN_BOOL(true);
+
+ pg_usleep(1000000L / WAITS_PER_SECOND);
+ }
+
+ ereport(WARNING,
+ (errmsg("server did not promote within %d seconds", WAIT_SECONDS)));
+ PG_RETURN_BOOL(false);
+}
diff --git a/src/backend/catalog/system_views.sql b/src/backend/catalog/system_views.sql
index 7251552419..9c9cf84c10 100644
--- a/src/backend/catalog/system_views.sql
+++ b/src/backend/catalog/system_views.sql
@@ -1119,6 +1119,13 @@ LANGUAGE INTERNAL
STRICT IMMUTABLE PARALLEL SAFE
AS 'jsonb_insert';
+CREATE OR REPLACE FUNCTION
+ pg_promote(wait boolean DEFAULT true)
+RETURNS boolean
+LANGUAGE INTERNAL
+STRICT VOLATILE
+AS 'pg_promote';
+
--
-- The default permissions for functions mean that anyone can execute them.
-- A number of functions shouldn't be executable by just anyone, but rather
diff --git a/src/include/access/xlog.h b/src/include/access/xlog.h
index 421ba6d775..e01d12eb7c 100644
--- a/src/include/access/xlog.h
+++ b/src/include/access/xlog.h
@@ -319,10 +319,16 @@ extern void do_pg_abort_backup(void);
extern SessionBackupState get_backup_status(void);
/* File path names (all relative to $PGDATA) */
+#define RECOVERY_COMMAND_FILE "recovery.conf"
+#define RECOVERY_COMMAND_DONE "recovery.done"
#define BACKUP_LABEL_FILE "backup_label"
#define BACKUP_LABEL_OLD "backup_label.old"
#define TABLESPACE_MAP "tablespace_map"
#define TABLESPACE_MAP_OLD "tablespace_map.old"
+/* files to signal promotion to primary */
+#define PROMOTE_SIGNAL_FILE "promote"
+#define FALLBACK_PROMOTE_SIGNAL_FILE "fallback_promote"
+
#endif /* XLOG_H */
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index 8e4145f42b..1598f930a4 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -5997,6 +5997,10 @@
proname => 'pg_backup_start_time', provolatile => 's',
prorettype => 'timestamptz', proargtypes => '',
prosrc => 'pg_backup_start_time' },
+{ oid => '3436', descr => 'promote standby server',
+ proname => 'pg_promote', provolatile => 'v',
+ prorettype => 'bool', proargtypes => 'bool', proargnames => '{wait}',
+ prosrc => 'pg_promote' },
{ oid => '2848', descr => 'switch to new wal file',
proname => 'pg_switch_wal', provolatile => 'v', prorettype => 'pg_lsn',
proargtypes => '', prosrc => 'pg_switch_wal' },
diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl
index 34ee335129..fd440ca9dd 100644
--- a/src/test/recovery/t/004_timeline_switch.pl
+++ b/src/test/recovery/t/004_timeline_switch.pl
@@ -37,9 +37,11 @@ $node_master->safe_psql('postgres',
$node_master->wait_for_catchup($node_standby_1, 'replay',
$node_master->lsn('write'));
-# Stop and remove master, and promote standby 1, switching it to a new timeline
+# Stop and remove master
$node_master->teardown_node;
-$node_standby_1->promote;
+
+# promote standby 1 using "pg_promote", switching it to a new timeline
+$node_standby_1->safe_psql('postgres', "SELECT pg_promote()");
# Switch standby 2 to replay from standby 1
rmtree($node_standby_2->data_dir . '/recovery.conf');
diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl
index 9ea3bd65fc..a6044179bc 100644
--- a/src/test/recovery/t/009_twophase.pl
+++ b/src/test/recovery/t/009_twophase.pl
@@ -214,7 +214,11 @@ $cur_master->psql(
INSERT INTO t_009_tbl VALUES (22, 'issued to ${cur_master_name}');
PREPARE TRANSACTION 'xact_009_10';");
$cur_master->teardown_node;
-$cur_standby->promote;
+
+# promote standby using "pg_promote" and wait until it is promoted
+$cur_standby->safe_psql('postgres', 'SELECT pg_promote(FALSE)');
+$cur_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()")
+ or die "standby never exited recovery";
# change roles
note "Now paris is master and london is standby";
--
2.17.2