On Thu, Feb 5, 2015 at 11:58 PM, Michael Paquier wrote: > An updated patch is attached. I just noticed that the patch I sent was incorrect: - Parameter name was still wal_availability_check_interval and not wal_retrieve_retry_interval - Documentation was incorrect. Please use the patch attached instead for further review. -- Michael
From 06a4d3d1f5fe4362d7be1404cbf0b45b74fea69f Mon Sep 17 00:00:00 2001 From: Michael Paquier <mich...@otacoo.com> Date: Mon, 19 Jan 2015 16:08:48 +0900 Subject: [PATCH] Add wal_retrieve_retry_interval
This parameter aids to control at which timing WAL availability is checked when a node is in recovery, particularly when successive failures happen when fetching WAL archives, or when fetching WAL records from a streaming source. Default value is 5s. --- doc/src/sgml/recovery-config.sgml | 17 +++++++++ src/backend/access/transam/recovery.conf.sample | 9 +++++ src/backend/access/transam/xlog.c | 47 +++++++++++++++++-------- src/backend/utils/adt/timestamp.c | 38 ++++++++++++++++++++ src/include/utils/timestamp.h | 2 ++ 5 files changed, 99 insertions(+), 14 deletions(-) diff --git a/doc/src/sgml/recovery-config.sgml b/doc/src/sgml/recovery-config.sgml index 0c64ff2..d4babbd 100644 --- a/doc/src/sgml/recovery-config.sgml +++ b/doc/src/sgml/recovery-config.sgml @@ -145,6 +145,23 @@ restore_command = 'copy "C:\\server\\archivedir\\%f" "%p"' # Windows </listitem> </varlistentry> + <varlistentry id="wal-retrieve-retry-interval" xreflabel="wal_retrieve_retry_interval"> + <term><varname>wal_retrieve_retry_interval</varname> (<type>integer</type>) + <indexterm> + <primary><varname>wal_retrieve_retry_interval</> recovery parameter</primary> + </indexterm> + </term> + <listitem> + <para> + This parameter specifies the amount of time to wait when a failure + occurred when reading WAL from a source (be it via streaming + replication, local <filename>pg_xlog</> or WAL archive) for a node + in standby mode, or when WAL is expected from a source. Default + value is <literal>5s</>. + </para> + </listitem> + </varlistentry> + </variablelist> </sect1> diff --git a/src/backend/access/transam/recovery.conf.sample b/src/backend/access/transam/recovery.conf.sample index b777400..458308c 100644 --- a/src/backend/access/transam/recovery.conf.sample +++ b/src/backend/access/transam/recovery.conf.sample @@ -58,6 +58,15 @@ # #recovery_end_command = '' # +# +# wal_retrieve_retry_interval +# +# specifies an optional internal to wait for WAL to become available when +# a failure occurred when reading WAL from a source for a node in standby +# mode, or when WAL is expected from a source. +# +#wal_retrieve_retry_interval = '5s' +# #--------------------------------------------------------------------------- # RECOVERY TARGET PARAMETERS #--------------------------------------------------------------------------- diff --git a/src/backend/access/transam/xlog.c b/src/backend/access/transam/xlog.c index 629a457..111e53d 100644 --- a/src/backend/access/transam/xlog.c +++ b/src/backend/access/transam/xlog.c @@ -235,6 +235,7 @@ static TimestampTz recoveryTargetTime; static char *recoveryTargetName; static int recovery_min_apply_delay = 0; static TimestampTz recoveryDelayUntilTime; +static int wal_retrieve_retry_interval = 5000; /* options taken from recovery.conf for XLOG streaming */ static bool StandbyModeRequested = false; @@ -4881,6 +4882,26 @@ readRecoveryCommandFile(void) (errmsg_internal("trigger_file = '%s'", TriggerFile))); } + else if (strcmp(item->name, "wal_retrieve_retry_interval") == 0) + { + const char *hintmsg; + + if (!parse_int(item->value, &wal_retrieve_retry_interval, GUC_UNIT_MS, + &hintmsg)) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("parameter \"%s\" requires a temporal value", + "wal_retrieve_retry_interval"), + hintmsg ? errhint("%s", _(hintmsg)) : 0)); + ereport(DEBUG2, + (errmsg_internal("wal_retrieve_retry_interval = '%s'", item->value))); + + if (wal_retrieve_retry_interval <= 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("\"%s\" must have a value strictly positive", + "wal_retrieve_retry_interval"))); + } else if (strcmp(item->name, "recovery_min_apply_delay") == 0) { const char *hintmsg; @@ -10340,8 +10361,8 @@ static bool WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, bool fetching_ckpt, XLogRecPtr tliRecPtr) { - static pg_time_t last_fail_time = 0; - pg_time_t now; + TimestampTz now = GetCurrentTimestamp(); + TimestampTz last_fail_time = now; /*------- * Standby mode is implemented by a state machine: @@ -10490,15 +10511,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, * machine, so we've exhausted all the options for * obtaining the requested WAL. We're going to loop back * and retry from the archive, but if it hasn't been long - * since last attempt, sleep 5 seconds to avoid - * busy-waiting. + * since last attempt, sleep the amount of time specified + * by wal_retrieve_retry_interval to avoid busy-waiting. */ - now = (pg_time_t) time(NULL); - if ((now - last_fail_time) < 5) - { - pg_usleep(1000000L * (5 - (now - last_fail_time))); - now = (pg_time_t) time(NULL); - } + now = GetCurrentTimestamp(); + if (TimestampSleepDifference(last_fail_time, now, + wal_retrieve_retry_interval)) + now = GetCurrentTimestamp(); last_fail_time = now; currentSource = XLOG_FROM_ARCHIVE; break; @@ -10653,13 +10672,13 @@ WaitForWALToBecomeAvailable(XLogRecPtr RecPtr, bool randAccess, } /* - * Wait for more WAL to arrive. Time out after 5 seconds, - * like when polling the archive, to react to a trigger - * file promptly. + * Wait for more WAL to arrive. Time out after the amount of + * time specified by wal_retrieve_retry_interval, like + * when polling the archive, to react to a trigger file promptly. */ WaitLatch(&XLogCtl->recoveryWakeupLatch, WL_LATCH_SET | WL_TIMEOUT, - 5000L); + wal_retrieve_retry_interval * 1000L); ResetLatch(&XLogCtl->recoveryWakeupLatch); break; } diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 67e0cf9..a4cf717 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -1674,6 +1674,44 @@ TimestampDifferenceExceeds(TimestampTz start_time, } /* + * TimestampSleepDifference -- sleep for the amout of interval time + * specified, reduced by the difference between two timestamps. + * Returns true if sleep is done, false otherwise. + * + * Both inputs must be ordinary finite timestamps (in current usage, + * they'll be results from GetCurrentTimestamp()). Sleep is done + * per steps of 1s to be able to handle process interruptions without + * having to wait for a too long time. + */ +bool +TimestampSleepDifference(TimestampTz start_time, + TimestampTz stop_time, + int interval_msec) +{ + long secs, total_time; + int microsecs; + + if (TimestampDifferenceExceeds(start_time, stop_time, interval_msec)) + return false; + + TimestampDifference(start_time, stop_time, &secs, µsecs); + total_time = interval_msec * 1000L - (1000000L * secs + 1L * microsecs); + + while (total_time > 0) + { + long wait_time = 1000000L; /* 1s */ + + if (total_time < wait_time) + wait_time = total_time; + + pg_usleep(wait_time); + HandleStartupProcInterrupts(); + total_time -= wait_time; + } + return true; +} + +/* * Convert a time_t to TimestampTz. * * We do not use time_t internally in Postgres, but this is provided for use diff --git a/src/include/utils/timestamp.h b/src/include/utils/timestamp.h index 70118f5..e4a7673 100644 --- a/src/include/utils/timestamp.h +++ b/src/include/utils/timestamp.h @@ -217,6 +217,8 @@ extern void TimestampDifference(TimestampTz start_time, TimestampTz stop_time, extern bool TimestampDifferenceExceeds(TimestampTz start_time, TimestampTz stop_time, int msec); +extern bool TimestampSleepDifference(TimestampTz start_time, + TimestampTz stop_time, int interval_msec); /* * Prototypes for functions to deal with integer timestamps, when the native -- 2.2.2
-- Sent via pgsql-hackers mailing list (pgsql-hackers@postgresql.org) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-hackers