This commit enables additional datapath health checks. The checks are enabled only on a PMD heartbeat failure. On missing three successive heartbeats additional health checks needs to be performed on respective PMD thread to confirm the failure.
The datapath health is monitored periodically from keepalive thread. It should be noted that the PMD health checks are only performed on the PMD threads whose health check is enabled. Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com> --- lib/dpif-netdev.c | 30 +++++++++++++++++++++ lib/keepalive.c | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/keepalive.h | 16 +++++++++++ 3 files changed, 127 insertions(+) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index 33b6fd0..b18b1b6 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -974,6 +974,35 @@ sorted_poll_thread_list(struct dp_netdev *dp, *n = k; } +static void +pmd_health_check(struct dp_netdev_pmd_thread *pmd OVS_UNUSED) +{ + /* Nothing */ +} + +static void +get_datapath_health(struct dp_netdev *dp) +{ + static struct hmap *process_list = NULL; + if (!process_list) { + ka_load_process_list(&process_list); + } + + struct ka_process_info *pinfo; + HMAP_FOR_EACH (pinfo, node, process_list) { + int core_id = pinfo->core_id; + struct dp_netdev_pmd_thread *pmd; + + /* Check only PMD threads whose health check is enabled. */ + if (OVS_LIKELY(pinfo->healthcheck == PMD_HC_DISABLE)) { + continue; + } + + pmd = dp_netdev_get_pmd(dp, core_id); + pmd_health_check(pmd); + } +} + static void * ovs_keepalive(void *f_) { @@ -985,6 +1014,7 @@ ovs_keepalive(void *f_) int n_pmds = cmap_count(&dp->poll_threads) - 1; if (n_pmds > 0) { dispatch_heartbeats(); + get_datapath_health(dp); get_ka_stats(); } diff --git a/lib/keepalive.c b/lib/keepalive.c index 227c2e1..89cf802 100644 --- a/lib/keepalive.c +++ b/lib/keepalive.c @@ -115,6 +115,7 @@ ka_register_thread(int tid, bool thread_is_pmd) ka_pinfo->heartbeats = true; ka_pinfo->core_id = core_num; ovs_strlcpy(ka_pinfo->name, proc_name, sizeof ka_pinfo->name); + ka_pinfo->healthcheck = PMD_HC_DISABLE; hmap_insert(&ka_info->process_list, &ka_pinfo->node, hash); @@ -186,6 +187,78 @@ ka_mark_pmd_thread_sleep(void) } void +ka_enable_pmd_health_check(unsigned core_id) +{ + if (ka_is_enabled()) { + struct ka_process_info *pinfo; + int tid = ka_get_pmd_tid(core_id); + ovs_mutex_lock(&ka_info->proclist_mutex); + HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0), + &ka_info->process_list) { + if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) { + pinfo->healthcheck = PMD_HC_ENABLE; + } + } + ovs_mutex_unlock(&ka_info->proclist_mutex); + } +} + +void +ka_disable_pmd_health_check(unsigned core_id) +{ + if (ka_is_enabled()) { + struct ka_process_info *pinfo; + int tid = ka_get_pmd_tid(core_id); + ovs_mutex_lock(&ka_info->proclist_mutex); + HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0), + &ka_info->process_list) { + if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) { + pinfo->healthcheck = PMD_HC_DISABLE; + } + } + ovs_mutex_unlock(&ka_info->proclist_mutex); + } +} + +enum pmdhealth_check +ka_get_pmd_health_check_state(unsigned core_id) + OVS_REQUIRES(ka_info->proclist_mutex) +{ + int hc = PMD_HC_DISABLE; + if (ka_is_enabled()) { + struct ka_process_info *pinfo; + int tid = ka_get_pmd_tid(core_id); + ovs_mutex_lock(&ka_info->proclist_mutex); + HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0), + &ka_info->process_list) { + if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) { + hc = pinfo->healthcheck; + } + } + ovs_mutex_unlock(&ka_info->proclist_mutex); + } + + return hc; +} + +void +ka_set_pmd_health_check_state(unsigned core_id, enum pmdhealth_check state) +{ + if (ka_is_enabled()) { + struct ka_process_info *pinfo; + int tid = ka_get_pmd_tid(core_id); + ovs_mutex_lock(&ka_info->proclist_mutex); + HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0), + &ka_info->process_list) { + if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) { + pinfo->healthcheck = state; + } + } + ovs_mutex_unlock(&ka_info->proclist_mutex); + } +} + +void ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state, uint64_t last_alive) { @@ -203,6 +276,14 @@ ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state, ovs_mutex_unlock(&ka_info->proclist_mutex); } +void +ka_load_process_list(struct hmap **process_list) +{ + if (ka_is_enabled()) { + *process_list = &ka_info->process_list; + } +} + /* Retrieve and return the keepalive timer interval from OVSDB. */ static uint32_t get_ka_timer_interval(const struct smap *ovs_other_config OVS_UNUSED) diff --git a/lib/keepalive.h b/lib/keepalive.h index cedc390..61697b2 100644 --- a/lib/keepalive.h +++ b/lib/keepalive.h @@ -40,11 +40,19 @@ enum keepalive_state { KA_STATE_CHECK = 7 }; +enum pmdhealth_check { + PMD_HC_DISABLE, + PMD_HC_ENABLE, + PMD_HC_PROGRESS, + PMD_HC_COMPLETE +}; + struct ka_process_info { char name[16]; int tid; int core_id; bool heartbeats; + enum pmdhealth_check healthcheck; enum keepalive_state core_state; uint64_t core_last_seen_times; struct hmap_node node; @@ -95,6 +103,13 @@ void ka_unregister_thread(int, bool); void ka_mark_pmd_thread_alive(void); void ka_mark_pmd_thread_sleep(void); +void ka_init_pmd_health_check(void); +void ka_enable_pmd_health_check(unsigned); +void ka_disable_pmd_health_check(unsigned); +bool ka_is_pmdhealth_check_enabled(unsigned); +enum pmdhealth_check ka_get_pmd_health_check_state(unsigned); +void ka_set_pmd_health_check_state(unsigned, enum pmdhealth_check); + void ka_store_pmd_id(unsigned core); uint32_t get_ka_interval(void); int get_ka_init_status(void); @@ -102,6 +117,7 @@ int ka_alloc_portstats(unsigned, int); void ka_destroy_portstats(void); void get_ka_stats(void); struct smap *ka_stats_run(void); +void ka_load_process_list(struct hmap **); void dispatch_heartbeats(void); #endif /* keepalive.h */ -- 2.4.11 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev