This commit adds the support to check the packet statistics on the port polled by PMD thread. If the packets aren't processed due to PMD thread stall/deadlock the statistics wont update and this can be used by monitoring framework to confirm PMD failure.
This mechanism has limitation with MQ enabled. In some cases queues of the DPDK port can be polled by different PMD threads. Even if one PMD thread stalls the port statistics will be incremented due to an other queue processed by different PMD. The function can return active state considering the packets processed in this case. Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com> --- lib/dpif-netdev.c | 25 +++++++++++--- lib/keepalive.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ lib/keepalive.h | 5 +++ 3 files changed, 122 insertions(+), 5 deletions(-) diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index a6edf4d..dca8e8e 100644 --- a/lib/dpif-netdev.c +++ b/lib/dpif-netdev.c @@ -984,8 +984,9 @@ sorted_poll_thread_list(struct dp_netdev *dp, static void pmd_health_check(struct dp_netdev_pmd_thread *pmd) { - int port_link_status = 0; struct rxq_poll *poll; + int port_link_status = 0; + int port_stats = 0; struct svec pmd_poll_list; svec_init(&pmd_poll_list); @@ -1000,22 +1001,36 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd) int i = 0; SVEC_FOR_EACH (i, port_name, &pmd_poll_list) { struct netdev *dev = netdev_from_name(port_name); + VLOG_DBG("Keepalive: Checking port %s", port_name); if (dev) { char *link_state = netdev_get_carrier(dev) ? "up" : "down"; ka_info_update_port_status(port_name, 0, link_state, pmd->core_id, i); + if (!strcmp(link_state, "up")) { + ka_info_update_port_statistics(dev, pmd->core_id, i); + } netdev_close(dev); } } svec_destroy(&pmd_poll_list); - port_link_status = ka_get_polled_ports_status(pmd->core_id); - int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id); - if (PMD_HC_COMPLETE == pmd_hc_state) { - if (port_link_status == ACTIVE_RUN_STATE) { + switch (pmd_hc_state) { + case PMD_HC_ENABLE: + ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_PROGRESS); + break; + case PMD_HC_PROGRESS: + ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_COMPLETE); + break; + case PMD_HC_COMPLETE: + port_link_status = ka_get_polled_ports_status(pmd->core_id); + port_stats = ka_get_polled_ports_stats(pmd->core_id); + + if (port_link_status == ACTIVE_RUN_STATE && + port_stats == ACTIVE_RUN_STATE ) { ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0); } + break; } } diff --git a/lib/keepalive.c b/lib/keepalive.c index c306839..7e56dd4 100644 --- a/lib/keepalive.c +++ b/lib/keepalive.c @@ -23,6 +23,7 @@ #include "dpdk.h" #include "keepalive.h" #include "lib/vswitch-idl.h" +#include "netdev-dpdk.h" #include "openvswitch/dynamic-string.h" #include "openvswitch/vlog.h" #include "ovs-thread.h" @@ -30,6 +31,7 @@ #include "unixctl.h" VLOG_DEFINE_THIS_MODULE(keepalive); +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20); static bool keepalive_enable = false; /* Keepalive disabled by default */ static bool ka_init_status = ka_init_failure; /* Keepalive initialization */ @@ -462,6 +464,31 @@ enum pmdhealth_status ka_get_polled_ports_status(unsigned core_id) } } +enum pmdhealth_status ka_get_polled_ports_stats(unsigned core_id) +{ + if (!ka_info) { + return FAILURE_STATE; + } + + int failed = 0; + int n_ports = ka_info->ext_stats[core_id].num_poll_ports; + for (int i = 0; i < n_ports; i++) { + int state; + state = + ka_info->ext_stats[core_id].port_stats[i].state[PORT_STATS_CHECK]; + if (state == FAILURE_STATE) { + failed = 1; + break; + } + } + + if (!failed) { + return ACTIVE_RUN_STATE; + } else { + return FAILURE_STATE; + } +} + void ka_info_update_port_status(const char *port, int qid OVS_UNUSED, char *link_state, int core_id, int idx) @@ -489,6 +516,76 @@ ka_info_update_port_status(const char *port, int qid OVS_UNUSED, state; } +void +ka_info_update_port_statistics(const struct netdev *netdev, + int core_id, int idx) +{ + int error; + int state = FAILURE_STATE; + + if (!ka_info) { + VLOG_ERR_RL(&rl, "Keepalive disabled"); + return; + } + ka_info->ext_stats[core_id].num_poll_ports = idx; + + int pmd_hc_state = ka_get_pmd_health_check_state(core_id); + if (PMD_HC_ENABLE == pmd_hc_state) { + struct netdev_stats *stats; + stats = &ka_info->ext_stats[core_id].port_stats[idx].stats; + error = netdev_get_stats(netdev, stats); + if (error) { + VLOG_ERR("\tCouldn't retrieve stats (%s)", ovs_strerror(error)); + } + state = ACTIVE_RUN_STATE; + } + + if (PMD_HC_PROGRESS == pmd_hc_state) { + struct netdev_stats temp_stats; + + error = netdev_get_stats(netdev, &temp_stats); + if (!error) { + uint64_t tx_pkts_cnt = 0; + uint64_t rx_pkts_cnt = 0; + int skip_tx_check = 0, skip_rx_check = 0; + + struct netdev_stats *prev_stats = + &ka_info->ext_stats[core_id].port_stats[idx].stats; + + if (!temp_stats.tx_packets && !prev_stats->tx_packets) { + VLOG_DBG_RL(&rl, "\tNo packets transmitted"); + skip_tx_check = 1; + } else { + tx_pkts_cnt = temp_stats.tx_packets - + prev_stats->tx_packets; + } + + if (!temp_stats.rx_packets && !prev_stats->rx_packets) { + VLOG_DBG_RL(&rl, "\tNo packets received"); + skip_rx_check = 1; + } else { + rx_pkts_cnt = temp_stats.rx_packets - + prev_stats->rx_packets; + } + + if (skip_tx_check && skip_rx_check) { + VLOG_DBG_RL(&rl, "\tNo active traffic"); + state = ACTIVE_RUN_STATE; + } else if ((!skip_tx_check && tx_pkts_cnt) || + (!skip_rx_check && rx_pkts_cnt)) { + VLOG_DBG_RL(&rl, "\tStats updated"); + state = ACTIVE_RUN_STATE; + } else { + VLOG_DBG("\tPMD failure"); + state = FAILURE_STATE; + } + } + } + + ka_info->ext_stats[core_id].port_stats[idx].state[PORT_STATS_CHECK] = + state; +} + static void ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED, const char *argv[] OVS_UNUSED, void *ka_info_) diff --git a/lib/keepalive.h b/lib/keepalive.h index ff4aa3c..37f1e83 100644 --- a/lib/keepalive.h +++ b/lib/keepalive.h @@ -27,6 +27,8 @@ #define KA_DP_MAXCORES 128 #endif /* DPDK_NETDEV */ +#include "netdev.h" + struct smap; enum keepalive_state { @@ -68,6 +70,7 @@ struct poll_port_stats { const char *port; char *link_state; int state[PORT_NUM_CHECKS]; + struct netdev_stats stats; }; struct pmd_extended_stats { @@ -135,5 +138,7 @@ void ka_load_process_list(struct hmap **); void dispatch_heartbeats(void); void ka_info_update_port_status(const char *,int,char *,int,int); enum pmdhealth_status ka_get_polled_ports_status(unsigned); +void ka_info_update_port_statistics(const struct netdev *,int,int); +enum pmdhealth_status ka_get_polled_ports_stats(unsigned); #endif /* keepalive.h */ -- 2.4.11 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev