From: zhen wang <zhew...@nvidia.com>

When ovn-northd work in HA mode, ovn-northd will not update the
probe interval in standby mode. If SB/NB raft leader and active
ovn-northd instance got killed by system power outage, standby
ovn-northd instance would never detect the failure.
This patch address the problem by updating the probe value in main loop.

Signed-off-by: zhen wang <zhew...@nvidia.com>
---
 northd/northd.c     | 25 -------------------------
 northd/ovn-northd.c | 30 ++++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 25 deletions(-)

diff --git a/northd/northd.c b/northd/northd.c
index 621e83175..91635b93b 100644
--- a/northd/northd.c
+++ b/northd/northd.c
@@ -73,10 +73,6 @@ static struct eth_addr svc_monitor_mac_ea;
  * Otherwise, it will avoid using it.  The default is true. */
 static bool use_ct_inv_match = true;
 
-/* Default probe interval for NB and SB DB connections. */
-#define DEFAULT_PROBE_INTERVAL_MSEC 5000
-static int northd_probe_interval_nb = 0;
-static int northd_probe_interval_sb = 0;
 #define MAX_OVN_TAGS 4096
 
 /* Pipeline stages. */
@@ -14190,20 +14186,6 @@ build_meter_groups(struct northd_context *ctx,
     }
 }
 
-static int
-get_probe_interval(const char *db, const struct nbrec_nb_global *nb)
-{
-    int default_interval = (db && !stream_or_pstream_needs_probes(db)
-                            ? 0 : DEFAULT_PROBE_INTERVAL_MSEC);
-    int interval = smap_get_int(&nb->options,
-                                "northd_probe_interval", default_interval);
-
-    if (interval > 0 && interval < 1000) {
-        interval = 1000;
-    }
-    return interval;
-}
-
 static void
 ovnnb_db_run(struct northd_context *ctx,
              struct ovsdb_idl_index *sbrec_chassis_by_name,
@@ -14290,13 +14272,6 @@ ovnnb_db_run(struct northd_context *ctx,
 
     smap_destroy(&options);
 
-    /* Update the probe interval. */
-    northd_probe_interval_nb = get_probe_interval(ctx->ovnnb_db, nb);
-    northd_probe_interval_sb = get_probe_interval(ctx->ovnsb_db, nb);
-
-    ovsdb_idl_set_probe_interval(ctx->ovnnb_idl, northd_probe_interval_nb);
-    ovsdb_idl_set_probe_interval(ctx->ovnsb_idl, northd_probe_interval_sb);
-
     use_parallel_build =
         (smap_get_bool(&nb->options, "use_parallel_build", false) &&
          can_parallelize_hashes(false));
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 42c0ad644..39aa96055 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -65,6 +65,10 @@ static const char *ssl_private_key_file;
 static const char *ssl_certificate_file;
 static const char *ssl_ca_cert_file;
 
+/* Default probe interval for NB and SB DB connections. */
+#define DEFAULT_PROBE_INTERVAL_MSEC 5000
+static int northd_probe_interval_nb = 0;
+static int northd_probe_interval_sb = 0;
 static bool use_parallel_build = true;
 
 static const char *rbac_chassis_auth[] =
@@ -576,6 +580,20 @@ update_ssl_config(void)
     }
 }
 
+static int
+get_probe_interval(const char *db, const struct nbrec_nb_global *nb)
+{
+    int default_interval = (db && !stream_or_pstream_needs_probes(db)
+                            ? 0 : DEFAULT_PROBE_INTERVAL_MSEC);
+    int interval = smap_get_int(&nb->options,
+                                "northd_probe_interval", default_interval);
+
+    if (interval > 0 && interval < 1000) {
+        interval = 1000;
+    }
+    return interval;
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -997,6 +1015,18 @@ main(int argc, char *argv[])
             poll_immediate_wake();
         }
 
+        const struct nbrec_nb_global *nb =
+            nbrec_nb_global_first(ovnnb_idl_loop.idl);
+        /* Update the probe interval. */
+        if (nb) {
+            northd_probe_interval_nb = get_probe_interval(ovnnb_db, nb);
+            northd_probe_interval_sb = get_probe_interval(ovnsb_db, nb);
+        }
+        ovsdb_idl_set_probe_interval(ovnnb_idl_loop.idl,
+                                     northd_probe_interval_nb);
+        ovsdb_idl_set_probe_interval(ovnsb_idl_loop.idl,
+                                     northd_probe_interval_sb);
+
         if (reset_ovnsb_idl_min_index) {
             VLOG_INFO("Resetting southbound database cluster state");
             ovsdb_idl_reset_min_index(ovnsb_idl_loop.idl);
-- 
2.20.1

_______________________________________________
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to