This is aimed at an upcoming database clustering implementation, where it's desirable to try all of the cluster members quickly before backing off to retry them again in sequence.
Signed-off-by: Ben Pfaff <b...@ovn.org> --- lib/reconnect.c | 52 ++++++++++++++++++++++++++++++--------------- lib/reconnect.h | 3 +++ python/ovs/reconnect.py | 53 ++++++++++++++++++++++++++++++---------------- tests/reconnect.at | 56 ++++++++++++++++++++++++++++++++++++++++++++++++- tests/test-reconnect.c | 8 +++++++ tests/test-reconnect.py | 5 +++++ 6 files changed, 141 insertions(+), 36 deletions(-) diff --git a/lib/reconnect.c b/lib/reconnect.c index 471fb7fc8d61..f91b4c09ae5d 100644 --- a/lib/reconnect.c +++ b/lib/reconnect.c @@ -62,6 +62,7 @@ struct reconnect { long long int last_connected; long long int last_disconnected; unsigned int max_tries; + unsigned int backoff_free_tries; /* These values are simply for statistics reporting, not otherwise used * directly by anything internal. */ @@ -206,6 +207,15 @@ reconnect_get_max_tries(struct reconnect *fsm) return fsm->max_tries; } +/* Sets the number of connection attempts that will be made without backoff to + * 'backoff_free_tries'. Values 0 and 1 both represent a single attempt. */ +void +reconnect_set_backoff_free_tries(struct reconnect *fsm, + unsigned int backoff_free_tries) +{ + fsm->backoff_free_tries = backoff_free_tries; +} + /* Configures the backoff parameters for 'fsm'. 'min_backoff' is the minimum * number of milliseconds, and 'max_backoff' is the maximum, between connection * attempts. The current backoff is also the duration that 'fsm' is willing to @@ -346,7 +356,7 @@ reconnect_disconnected(struct reconnect *fsm, long long int now, int error) VLOG(fsm->info, "%s: error listening for connections", fsm->name); } - } else { + } else if (fsm->backoff < fsm->max_backoff) { const char *type = fsm->passive ? "listen" : "connection"; if (error > 0) { VLOG_INFO("%s: %s attempt failed (%s)", @@ -359,30 +369,38 @@ reconnect_disconnected(struct reconnect *fsm, long long int now, int error) if (fsm->state & (S_ACTIVE | S_IDLE)) { fsm->last_disconnected = now; } + + if (!reconnect_may_retry(fsm)) { + reconnect_transition__(fsm, now, S_VOID); + return; + } + /* Back off. */ - if (fsm->state & (S_ACTIVE | S_IDLE) - && (fsm->last_activity - fsm->last_connected >= fsm->backoff - || fsm->passive)) { + if (fsm->backoff_free_tries > 1) { + fsm->backoff_free_tries--; + fsm->backoff = 0; + } else if (fsm->state & (S_ACTIVE | S_IDLE) + && (fsm->last_activity - fsm->last_connected >= fsm->backoff + || fsm->passive)) { fsm->backoff = fsm->passive ? 0 : fsm->min_backoff; } else { if (fsm->backoff < fsm->min_backoff) { fsm->backoff = fsm->min_backoff; - } else if (fsm->backoff >= fsm->max_backoff / 2) { - fsm->backoff = fsm->max_backoff; - } else { + } else if (fsm->backoff < fsm->max_backoff / 2) { fsm->backoff *= 2; - } - if (fsm->passive) { - VLOG(fsm->info, "%s: waiting %.3g seconds before trying to " - "listen again", fsm->name, fsm->backoff / 1000.0); + VLOG(fsm->info, "%s: waiting %.3g seconds before %s", + fsm->name, fsm->backoff / 1000.0, + fsm->passive ? "trying to listen again" : "reconnect"); } else { - VLOG(fsm->info, "%s: waiting %.3g seconds before reconnect", - fsm->name, fsm->backoff / 1000.0); + if (fsm->backoff < fsm->max_backoff) { + VLOG_INFO("%s: continuing to %s in the background but " + "suppressing further logging", fsm->name, + fsm->passive ? "try to listen" : "reconnect"); + } + fsm->backoff = fsm->max_backoff; } } - - reconnect_transition__(fsm, now, - reconnect_may_retry(fsm) ? S_BACKOFF : S_VOID); + reconnect_transition__(fsm, now, S_BACKOFF); } } @@ -397,7 +415,7 @@ reconnect_connecting(struct reconnect *fsm, long long int now) if (fsm->state != S_CONNECTING) { if (fsm->passive) { VLOG(fsm->info, "%s: listening...", fsm->name); - } else { + } else if (fsm->backoff < fsm->max_backoff) { VLOG(fsm->info, "%s: connecting...", fsm->name); } reconnect_transition__(fsm, now, S_CONNECTING); diff --git a/lib/reconnect.h b/lib/reconnect.h index 4446713ce873..9f2d469e2ddd 100644 --- a/lib/reconnect.h +++ b/lib/reconnect.h @@ -51,6 +51,8 @@ int reconnect_get_probe_interval(const struct reconnect *); void reconnect_set_max_tries(struct reconnect *, unsigned int max_tries); unsigned int reconnect_get_max_tries(struct reconnect *); +void reconnect_set_backoff_free_tries(struct reconnect *, + unsigned int backoff_free_tries); void reconnect_set_backoff(struct reconnect *, int min_backoff, int max_backoff); @@ -65,6 +67,7 @@ void reconnect_enable(struct reconnect *, long long int now); void reconnect_disable(struct reconnect *, long long int now); void reconnect_force_reconnect(struct reconnect *, long long int now); +void reconnect_skip_backoff(struct reconnect *); bool reconnect_is_connected(const struct reconnect *); unsigned int reconnect_get_last_connect_elapsed(const struct reconnect *, diff --git a/python/ovs/reconnect.py b/python/ovs/reconnect.py index ec52ebb7affc..34cc76987031 100644 --- a/python/ovs/reconnect.py +++ b/python/ovs/reconnect.py @@ -154,6 +154,7 @@ class Reconnect(object): self.last_connected = None self.last_disconnected = None self.max_tries = None + self.backoff_free_tries = 0 self.creation_time = now self.n_attempted_connections = 0 @@ -242,6 +243,12 @@ class Reconnect(object): self.backoff > self.max_backoff): self.backoff = self.max_backoff + def set_backoff_free_tries(self, backoff_free_tries): + """Sets the number of connection attempts that will be made without + backoff to 'backoff_free_tries'. Values 0 and 1 both + represent a single attempt.""" + self.backoff_free_tries = backoff_free_tries + def set_probe_interval(self, probe_interval): """Sets the "probe interval" to 'probe_interval', in milliseconds. If this is zero, it disables the connection keepalive feature. If it is @@ -337,7 +344,7 @@ class Reconnect(object): else: self.info_level("%s: error listening for connections" % self.name) - else: + elif self.backoff < self.max_backoff: if self.passive: type_ = "listen" else: @@ -352,8 +359,15 @@ class Reconnect(object): if (self.state in (Reconnect.Active, Reconnect.Idle)): self.last_disconnected = now + if not self.__may_retry(): + self._transition(now, Reconnect.Void) + return + # Back off - if (self.state in (Reconnect.Active, Reconnect.Idle) and + if self.backoff_free_tries > 1: + self.backoff_free_tries -= 1 + self.backoff = 0 + elif (self.state in (Reconnect.Active, Reconnect.Idle) and (self.last_activity - self.last_connected >= self.backoff or self.passive)): if self.passive: @@ -363,23 +377,26 @@ class Reconnect(object): else: if self.backoff < self.min_backoff: self.backoff = self.min_backoff - elif self.backoff >= self.max_backoff / 2: - self.backoff = self.max_backoff - else: + elif self.backoff < self.max_backoff / 2: self.backoff *= 2 - - if self.passive: - self.info_level("%s: waiting %.3g seconds before trying " - "to listen again" - % (self.name, self.backoff / 1000.0)) + if self.passive: + action = "trying to listen again" + else: + action = "reconnect" + self.info_level("%s: waiting %.3g seconds before %s" + % (self.name, self.backoff / 1000.0, + action)) else: - self.info_level("%s: waiting %.3g seconds before reconnect" - % (self.name, self.backoff / 1000.0)) - - if self.__may_retry(): - self._transition(now, Reconnect.Backoff) - else: - self._transition(now, Reconnect.Void) + if self.backoff < self.max_backoff: + if self.passive: + action = "try to listen" + else: + action = "reconnect" + self.info_level("%s: continuing to %s in the " + "background but suppressing further " + "logging" % (self.name, action)) + self.backoff = self.max_backoff + self._transition(now, Reconnect.Backoff) def connecting(self, now): """Tell this FSM that a connection or listening attempt is in progress. @@ -390,7 +407,7 @@ class Reconnect(object): if self.state != Reconnect.ConnectInProgress: if self.passive: self.info_level("%s: listening..." % self.name) - else: + elif self.backoff < self.max_backoff: self.info_level("%s: connecting..." % self.name) self._transition(now, Reconnect.ConnectInProgress) diff --git a/tests/reconnect.at b/tests/reconnect.at index c88ca785cad2..59c95d95bdd3 100644 --- a/tests/reconnect.at +++ b/tests/reconnect.at @@ -1037,6 +1037,60 @@ timeout ]) ###################################################################### +RECONNECT_CHECK([backoff-free tries work], + [set-backoff-free-tries 2 +enable + +# Connection fails quickly. +run +connect-failed ECONNREFUSED + +# No backoff. +run +timeout + +# Connection fails quickly again. +run +connect-failed ECONNREFUSED + +# Back off for 1000 ms. +run +timeout +], + [### t=1000 ### +set-backoff-free-tries 2 +enable + in BACKOFF for 0 ms (0 ms backoff) + +# Connection fails quickly. +run + should connect +connect-failed ECONNREFUSED + 0 successful connections out of 1 attempts, seqno 0 + +# No backoff. +run + should connect +timeout + advance 0 ms + +# Connection fails quickly again. +run + should connect +connect-failed ECONNREFUSED + in BACKOFF for 0 ms (1000 ms backoff) + 0 successful connections out of 2 attempts, seqno 0 + +# Back off for 1000 ms. +run +timeout + advance 1000 ms + +### t=2000 ### + in BACKOFF for 1000 ms (1000 ms backoff) +]) + +###################################################################### RECONNECT_CHECK([max-tries of 1 honored], [set-max-tries 1 enable @@ -1090,7 +1144,7 @@ timeout run should disconnect disconnected - in VOID for 0 ms (1000 ms backoff) + in VOID for 0 ms (0 ms backoff) 1 successful connections out of 1 attempts, seqno 2 disconnected disconnected at 11000 ms (0 ms ago) diff --git a/tests/test-reconnect.c b/tests/test-reconnect.c index 72252b8f707b..5a14e7fe58da 100644 --- a/tests/test-reconnect.c +++ b/tests/test-reconnect.c @@ -208,6 +208,12 @@ do_set_max_tries(struct ovs_cmdl_context *ctx) } static void +do_set_backoff_free_tries(struct ovs_cmdl_context *ctx) +{ + reconnect_set_backoff_free_tries(reconnect, atoi(ctx->argv[1])); +} + +static void diff_stats(const struct reconnect_stats *old, const struct reconnect_stats *new, int delta) @@ -284,6 +290,8 @@ static const struct ovs_cmdl_command all_commands[] = { { "advance", NULL, 1, 1, do_advance, OVS_RO }, { "timeout", NULL, 0, 0, do_timeout, OVS_RO }, { "set-max-tries", NULL, 1, 1, do_set_max_tries, OVS_RO }, + { "set-backoff-free-tries", NULL, 1, 1, do_set_backoff_free_tries, + OVS_RO }, { "passive", NULL, 0, 0, do_set_passive, OVS_RO }, { "listening", NULL, 0, 0, do_listening, OVS_RO }, { "listen-error", NULL, 1, 1, do_listen_error, OVS_RO }, diff --git a/tests/test-reconnect.py b/tests/test-reconnect.py index 8132fd9258ef..6cd052878eb1 100644 --- a/tests/test-reconnect.py +++ b/tests/test-reconnect.py @@ -104,6 +104,10 @@ def do_set_max_tries(arg): r.set_max_tries(int(arg)) +def do_set_backoff_free_tries(arg): + r.set_backoff_free_tries(int(arg)) + + def diff_stats(old, new, delta): if (old.state != new.state or old.state_elapsed != new.state_elapsed or @@ -173,6 +177,7 @@ def main(): "advance": do_advance, "timeout": do_timeout, "set-max-tries": do_set_max_tries, + "set-backoff-free-tries": do_set_backoff_free_tries, "passive": do_set_passive, "listening": do_listening, "listen-error": do_listen_error -- 2.10.2 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev