[ovs-dev] [RFC PATCH v2 11/19] keepalive: Add support to query keepalive status.

2017-06-12 Thread Bhanuprakash Bodireddy
This commit adds support to query if keepalive status is
enabled/disabled.

  $ ovs-appctl keepalive/status
keepAlive Status: Enabled

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 3786d47..974d17a 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -354,6 +354,19 @@ out:
 ds_destroy();
 }
 
+static void
+ka_unixctl_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+
+ds_put_format(, "keepAlive Status: %s",
+  is_ka_enabled() ? "Enabled" : "Disabled");
+
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 static int
 ka_init__(void)
 {
@@ -397,6 +410,8 @@ ka_init(const struct smap *ovs_other_config)
 
 unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
   ka_unixctl_pmd_health_show, ka_shm);
+unixctl_command_register("keepalive/status", "", 0, 0,
+  ka_unixctl_status, NULL);
 
 ovsthread_once_done(_enable);
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v2 10/19] keepalive: Add support to query keepalive statistics.

2017-06-12 Thread Bhanuprakash Bodireddy
This commit adds support to query keepalive statistics stored in
posix shared memory block. Datapath health status can be retrieved as
follows:

  $ ovs-appctl keepalive/pmd-health-show

  Keepalive status

keepalive status  : Enabled
keepalive interval: 1000 ms

CORESTATE   LAST SEEN TIMESTAMP
 0  ALIVE   8632183482028293
 1  ALIVE   8632183482028425
 2  ALIVE   8632190191004294
 3  ALIVE   8632183482028525
 4  GONE8612183482028117
 5  ALIVE   8632190191004984
 6  ALIVE   8632190191005713
 7  ALIVE   8632190191006555

Datapath status   : BAD

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 78 +
 1 file changed, 78 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index ff9ce2b..3786d47 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -24,8 +24,10 @@
 #include "dpdk.h"
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -279,6 +281,79 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+static void
+ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_shm_)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\t\tKeepalive status\n");
+
+ds_put_format(, "keepalive status  : %s\n",
+  is_ka_enabled() ? "Enabled" : "Disabled");
+
+if (!is_ka_enabled()) {
+goto out;
+}
+
+ds_put_format(, "keepalive interval: %"PRIu32" ms\n",
+  get_ka_interval());
+
+struct keepalive_shm *ka_shm = (struct keepalive_shm *)ka_shm_;
+if (!ka_shm) {
+ds_put_format(, "KeepAlive: Invalid shared memory block\n");
+goto out;
+}
+
+ds_put_format(,
+  "\nCORE\tSTATE\tLAST SEEN TIMESTAMP\n");
+
+int datapath_failure = 0;
+for (int idx_core = 0; idx_core < KEEPALIVE_MAXCORES; idx_core++) {
+char *state = NULL;
+if (ka_shm->core_state[idx_core] == KA_STATE_UNUSED ||
+ka_shm->core_state[idx_core] == KA_STATE_SLEEP)
+continue;
+
+switch (ka_shm->core_state[idx_core]) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+datapath_failure++;
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+ds_put_format(, "%2d\t%s\t%"PRIu64"\n",
+  idx_core, state, ka_shm->core_last_seen_times[idx_core]);
+}
+
+ds_put_format(, "\n");
+ds_put_format(, "Datapath Status   : %s\n",
+  datapath_failure ? "BAD" : "HEALTHY");
+
+out:
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 static int
 ka_init__(void)
 {
@@ -320,6 +395,9 @@ ka_init(const struct smap *ovs_other_config)
 VLOG_INFO("OvS Keepalive disabled.");
 }
 
+unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
+  ka_unixctl_pmd_health_show, ka_shm);
+
 ovsthread_once_done(_enable);
 }
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v2 16/19] keepalive: Check the PMD cycle stats as part of PMD health checks.

2017-06-12 Thread Bhanuprakash Bodireddy
This commit adds the support to check the PMD cycle stats. If the cycles
aren't changing for a duration of time this can be flagged as possible
PMD stall.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 18 +++---
 lib/dpif-netdev.h |  6 ++
 lib/keepalive.c   | 52 
 lib/keepalive.h   |  3 +++
 4 files changed, 72 insertions(+), 7 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 06ca7fb..dd9d396 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -327,12 +327,6 @@ enum dp_stat_type {
 DP_N_STATS
 };
 
-enum pmd_cycles_counter_type {
-PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
-PMD_CYCLES_PROCESSING,  /* Cycles spent processing packets */
-PMD_N_CYCLES
-};
-
 #define XPS_TIMEOUT_MS 500LL
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
@@ -977,6 +971,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 struct rxq_poll *poll;
 int port_link_status = 0;
 int port_stats = 0;
+int pmd_polling = 0;
+uint64_t cycles[PMD_N_CYCLES];
 
 struct svec pmd_poll_list;
 svec_init(_poll_list);
@@ -1011,6 +1007,13 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 }
 svec_destroy(_poll_list);
 
+/* Update the cycle counters in SHM. */
+for (int idx = 0; idx < ARRAY_SIZE(cycles); idx++) {
+atomic_read_relaxed(>cycles.n[idx], [idx]);
+}
+
+pmd_polling = ka_shm_update_pmd_cycles(pmd->core_id, cycles);
+
 port_link_status = ka_get_polled_ports_status(pmd->core_id);
 port_stats = ka_get_polled_ports_stats(pmd->core_id);
 
@@ -1024,7 +1027,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 break;
 case PMD_HC_COMPLETE:
 if (port_link_status == ACTIVE_RUN_STATE &&
-   port_stats == ACTIVE_RUN_STATE ) {
+  port_stats == ACTIVE_RUN_STATE &&
+pmd_polling == ACTIVE_RUN_STATE) {
 ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
 }
 break;
diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
index 6db6ed2..e7c2400 100644
--- a/lib/dpif-netdev.h
+++ b/lib/dpif-netdev.h
@@ -33,6 +33,12 @@ extern "C" {
  * headers to be aligned on a 4-byte boundary.  */
 enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
 
+enum pmd_cycles_counter_type {
+PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
+PMD_CYCLES_PROCESSING,  /* Cycles spent processing packets */
+PMD_N_CYCLES
+};
+
 bool dpif_is_netdev(const struct dpif *);
 
 #define NR_QUEUE   1
diff --git a/lib/keepalive.c b/lib/keepalive.c
index b702ebc..84813bf 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -527,6 +527,58 @@ ka_shm_update_port_statistics(const struct netdev *netdev,
 state;
 }
 
+int
+ka_shm_update_pmd_cycles(int core_id, uint64_t cycles[PMD_N_CYCLES])
+{
+int pmd_state = ACTIVE_RUN_STATE;
+struct keepalive_shm *ka_shm = get_ka_shm();
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block.");
+return -1;
+}
+
+uint64_t total_cycles = 0;
+for (int i = 0; i < PMD_N_CYCLES; i++) {
+if (cycles[i] > 0) {
+total_cycles += cycles[i];
+}
+}
+
+if (!total_cycles)
+return -1;
+
+int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+if (PMD_HC_ENABLE == pmd_hc_state) {
+ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_POLLING] =
+   cycles[PMD_CYCLES_POLLING];
+
+ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING] =
+   cycles[PMD_CYCLES_PROCESSING];
+}
+
+if (PMD_HC_PROGRESS == pmd_hc_state) {
+uint64_t polling_cycles_cnt = 0, proc_cycles_cnt = 0;
+uint64_t prev_poll_cycles =
+ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_POLLING];
+uint64_t prev_proc_cycles =
+ka_shm->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING];
+
+VLOG_DBG_RL(, "Keepalive: Going to check the PMD thresholds now.");
+
+polling_cycles_cnt = cycles[PMD_CYCLES_POLLING] - prev_poll_cycles;
+
+proc_cycles_cnt = cycles[PMD_CYCLES_PROCESSING]
+   - prev_proc_cycles;
+
+if (!polling_cycles_cnt && !proc_cycles_cnt) {
+VLOG_DBG("PMD FAILURE!");
+pmd_state = FAILURE_STATE;
+}
+}
+
+return pmd_state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[] OVS_UNUSED, void *ka_shm_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 1f1f1c1..7501065 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -26,6 +26,7 @@
 #define KEEPALIVE_MAXCORES 128
 #endif /* DPDK_NETDEV

[ovs-dev] [RFC PATCH v2 15/19] keepalive: Check the packet statistics as part of PMD health checks.

2017-06-12 Thread Bhanuprakash Bodireddy
This commit adds the support to check the packet statistics on the port
polled by PMD thread. If the packets aren't processed due to PMD thread
stall/deadlock the statistics wont update and this can be used by
monitoring framework to confirm PMD failure.

This mechanism has limitation with MQ enabled. In some cases queues of
the DPDK port can be polled by different PMD threads. Even if one PMD
thread stalls the port statistics will be incremented due to an other
queue processed by different PMD. The function can return active state
considering the packets processed in this case.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  23 +++--
 lib/keepalive.c   | 100 ++
 lib/keepalive.h   |   6 
 3 files changed, 126 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 32cdb9f..06ca7fb 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -974,8 +974,9 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 static void
 pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 {
-int port_link_status = 0;
 struct rxq_poll *poll;
+int port_link_status = 0;
+int port_stats = 0;
 
 struct svec pmd_poll_list;
 svec_init(_poll_list);
@@ -998,6 +999,12 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 ka_shm_update_port_status(netdev_rxq_get_name(poll->rxq->rx),
   netdev_rxq_get_queue_id(poll->rxq->rx),
   link_state, pmd->core_id, i);
+
+if (!strcmp(link_state, "up")) {
+ka_shm_update_port_statistics(poll->rxq->port->netdev,
+pmd->core_id, i);
+}
+
 break;
 }
 }
@@ -1005,12 +1012,22 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 svec_destroy(_poll_list);
 
 port_link_status = ka_get_polled_ports_status(pmd->core_id);
+port_stats = ka_get_polled_ports_stats(pmd->core_id);
 
 int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
-if (PMD_HC_COMPLETE == pmd_hc_state) {
-if (port_link_status == ACTIVE_RUN_STATE) {
+switch (pmd_hc_state) {
+case PMD_HC_ENABLE:
+ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_PROGRESS);
+break;
+case PMD_HC_PROGRESS:
+ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_COMPLETE);
+break;
+case PMD_HC_COMPLETE:
+if (port_link_status == ACTIVE_RUN_STATE &&
+   port_stats == ACTIVE_RUN_STATE ) {
 ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
 }
+break;
 }
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 86d39db..b702ebc 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -24,6 +24,7 @@
 #include "dpdk.h"
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "netdev-dpdk.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
@@ -389,6 +390,33 @@ enum pmdhealth_status ka_get_polled_ports_status(unsigned 
core_id)
 }
 }
 
+enum pmdhealth_status ka_get_polled_ports_stats(unsigned core_id)
+{
+struct keepalive_shm *ka_shm = get_ka_shm();
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block.");
+return -1;
+}
+
+int failed = 0;
+int n_ports = ka_shm->ext_stats[core_id].num_poll_ports;
+for (int i = 0; i < n_ports; i++) {
+int state;
+state =
+  ka_shm->ext_stats[core_id].port_stats[i].state[PORT_STATS_CHECK];
+if (state == FAILURE_STATE) {
+failed = 1;
+break;
+}
+}
+
+if (!failed) {
+return ACTIVE_RUN_STATE;
+} else {
+return FAILURE_STATE;
+}
+}
+
 void
 ka_shm_update_port_status(const char *port, int qid, char *link_state,
   int core_id, int idx)
@@ -427,6 +455,78 @@ ka_shm_update_port_status(const char *port, int qid, char 
*link_state,
state;
 }
 
+void
+ka_shm_update_port_statistics(const struct netdev *netdev,
+  int core_id, int idx)
+{
+int error;
+int state = FAILURE_STATE;
+struct keepalive_shm *ka_shm = get_ka_shm();
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block.");
+return;
+}
+
+ka_shm->ext_stats[core_id].num_poll_ports = idx;
+
+int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+if (PMD_HC_ENABLE == pmd_hc_state) {
+struct netdev_stats *stats;
+stats = _shm->ext_stats[core_id].port_stats[idx].stats;
+error = n

[ovs-dev] [RFC PATCH v2 17/19] netdev-dpdk: Enable PMD health checks on heartbeat failure.

2017-06-12 Thread Bhanuprakash Bodireddy
The keepalive thread sends heartbeats to PMD thread and when PMD fails to
respond to successive heartbeats the PMD is potentially stalled. The PMD
state transition is as below:

ALIVE -> MISSING -> DEAD -> GONE

This commit enables PMD healthchecks when PMD doesn't respond to
heartbeats. This is needed to handle false negatives. With this commit
the new state transition is as below:

ALIVE -> MISSING -> DEAD -> CHECK -> GONE

PMD Health checking state is introduced and will immediately kickin when
the PMD gets in to DEAD state. As part of this below are considered.

  - Link status of the ports polled by PMD thread.
  - Statistics of the ports polled by PMD thread.
  - PMD polling and processing cycles.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.h   |  3 +++
 lib/netdev-dpdk.c | 55 +--
 2 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/lib/keepalive.h b/lib/keepalive.h
index 7501065..36789ee 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -71,6 +71,9 @@ struct keepalive_shm {
 /* Last seen timestamp of the core */
 uint64_t core_last_seen_times[KEEPALIVE_MAXCORES];
 
+/* Number of PMD failures */
+uint32_t core_failures[KEEPALIVE_MAXCORES];
+
 /* Store pmd thread tid */
 pid_t thread_id[KEEPALIVE_MAXCORES];
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 24a87bb..15c8c68 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -606,6 +606,51 @@ dpdk_failcore_cb(void *ptr_data, const int core_id)
 }
 }
 
+static void
+dpdk_ka_handle_failure(enum keepalive_state fail_state, const int core_id,
+   const enum rte_keepalive_state core_state,
+   uint64_t last_alive, struct keepalive_shm *ka_shm)
+{
+if (fail_state == KA_STATE_DEAD) {
+/* If process is in DEFUNC/UNINTERRUPTIBLE/TRACED state it is inactive
+ * and no additional health checks are needed. */
+uint32_t tid = ka_get_tid(core_id);
+if (process_is_active(tid)) {
+   /* Enable PMD health check only when PMD is in 'RUNNING' state and
+* still doesn't respond to heartbeats. Health checks are needed to
+* analyze other stats as we are in penultimate state of declaring
+* PMD as failed. */
+ka_enable_pmd_health_check(core_id);
+}
+ka_set_pmd_state_ts(core_id, KA_STATE_DEAD, last_alive);
+}
+
+if (fail_state == KA_STATE_GONE) {
+int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+
+switch (pmd_hc_state) {
+case PMD_HC_ENABLE:
+break;
+case PMD_HC_DISABLE:
+VLOG_DBG_RL(, "Health check disabled for PMD core:%d", core_id);
+break;
+case PMD_HC_PROGRESS:
+ka_set_pmd_state_ts(core_id, KA_STATE_CHECK, last_alive);
+break;
+
+case PMD_HC_COMPLETE:
+ka_shm->core_failures[core_id]++;
+ka_set_pmd_state_ts(core_id, core_state, last_alive);
+ka_disable_pmd_health_check(core_id);
+break;
+
+default:
+VLOG_DBG_RL(, "Unknown health check state %d", pmd_hc_state);
+OVS_NOT_REACHED();
+}
+}
+}
+
 /* Update the core state in shared memory.
  *
  * This function shall be invoked periodically to write the core status and
@@ -631,10 +676,16 @@ dpdk_ka_update_core_state(void *ptr_data, const int 
core_id,
 case RTE_KA_STATE_MISSING:
 ka_set_pmd_state_ts(core_id, KA_STATE_ALIVE, last_alive);
 break;
-case RTE_KA_STATE_DOZING:
-case RTE_KA_STATE_SLEEP:
 case RTE_KA_STATE_DEAD:
+dpdk_ka_handle_failure(KA_STATE_DEAD, core_id, core_state,
+   last_alive, ka_shm);
+break;
 case RTE_KA_STATE_GONE:
+dpdk_ka_handle_failure(KA_STATE_GONE, core_id, core_state,
+   last_alive, ka_shm);
+break;
+case RTE_KA_STATE_DOZING:
+case RTE_KA_STATE_SLEEP:
 ka_set_pmd_state_ts(core_id, core_state, last_alive);
 break;
 case RTE_KA_STATE_UNUSED:
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] process: Consolidate process related APIs.

2017-06-20 Thread Bhanuprakash Bodireddy
As part of retrieving system statistics, process status APIs along with
helper functions were implemented. Some of them are very generic and can
be reused by other subsystems.

Move the APIs in system-stats.c to process.c and util.c and make them
available. This patch doesn't change any functionality.

CC: Ben Pfaff <b...@ovn.org>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
v1->v2
  * Move ticks_to_ms() from util.c to process.c
  * Verify the changes and test it by enabling statistics using,
 $ovs-vsctl set Open_vSwitch . other_config:enable-statistics=true

 lib/process.c   | 189 
 lib/process.h   |  12 +++
 lib/util.c  |  68 +
 lib/util.h  |   3 +
 vswitchd/system-stats.c | 251 +---
 5 files changed, 273 insertions(+), 250 deletions(-)

diff --git a/lib/process.c b/lib/process.c
index e9d0ba9..3e119b5 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -33,6 +33,7 @@
 #include "poll-loop.h"
 #include "signals.h"
 #include "socket-util.h"
+#include "timeval.h"
 #include "util.h"
 #include "openvswitch/vlog.h"
 
@@ -40,6 +41,13 @@ VLOG_DEFINE_THIS_MODULE(process);
 
 COVERAGE_DEFINE(process_start);
 
+#ifdef __linux__
+#define LINUX 1
+#include 
+#else
+#define LINUX 0
+#endif
+
 struct process {
 struct ovs_list node;
 char *name;
@@ -50,6 +58,15 @@ struct process {
 int status;
 };
 
+struct raw_process_info {
+unsigned long int vsz;  /* Virtual size, in kB. */
+unsigned long int rss;  /* Resident set size, in kB. */
+long long int uptime;   /* ms since started. */
+long long int cputime;  /* ms of CPU used during 'uptime'. */
+pid_t ppid; /* Parent. */
+char name[18];  /* Name (surrounded by parentheses). */
+};
+
 /* Pipe used to signal child termination. */
 static int fds[2];
 
@@ -327,6 +344,178 @@ process_status(const struct process *p)
 return p->status;
 }
 
+int
+count_crashes(pid_t pid)
+{
+char file_name[128];
+const char *paren;
+char line[128];
+int crashes = 0;
+FILE *stream;
+
+ovs_assert(LINUX);
+
+sprintf(file_name, "/proc/%lu/cmdline", (unsigned long int) pid);
+stream = fopen(file_name, "r");
+if (!stream) {
+VLOG_WARN_ONCE("%s: open failed (%s)", file_name, ovs_strerror(errno));
+goto exit;
+}
+
+if (!fgets(line, sizeof line, stream)) {
+VLOG_WARN_ONCE("%s: read failed (%s)", file_name,
+   feof(stream) ? "end of file" : ovs_strerror(errno));
+goto exit_close;
+}
+
+paren = strchr(line, '(');
+if (paren) {
+int x;
+if (ovs_scan(paren + 1, "%d", )) {
+crashes = x;
+}
+}
+
+exit_close:
+fclose(stream);
+exit:
+return crashes;
+}
+
+static unsigned long long int
+ticks_to_ms(unsigned long long int ticks)
+{
+ovs_assert(LINUX);
+
+#ifndef USER_HZ
+#define USER_HZ 100
+#endif
+
+#if USER_HZ == 100  /* Common case. */
+return ticks * (1000 / USER_HZ);
+#else  /* Alpha and some other architectures.  */
+double factor = 1000.0 / USER_HZ;
+return ticks * factor + 0.5;
+#endif
+}
+
+static bool
+get_raw_process_info(pid_t pid, struct raw_process_info *raw)
+{
+unsigned long long int vsize, rss, start_time, utime, stime;
+long long int start_msec;
+unsigned long ppid;
+char file_name[128];
+FILE *stream;
+int n;
+
+ovs_assert(LINUX);
+
+sprintf(file_name, "/proc/%lu/stat", (unsigned long int) pid);
+stream = fopen(file_name, "r");
+if (!stream) {
+VLOG_ERR_ONCE("%s: open failed (%s)",
+  file_name, ovs_strerror(errno));
+return false;
+}
+
+n = fscanf(stream,
+   "%*d "   /* (1. pid) */
+   "%17s "  /* 2. process name */
+   "%*c "   /* (3. state) */
+   "%lu "   /* 4. ppid */
+   "%*d "   /* (5. pgid) */
+   "%*d "   /* (6. sid) */
+   "%*d "   /* (7. tty_nr) */
+   "%*d "   /* (8. tty_pgrp) */
+   "%*u "   /* (9. flags) */
+   "%*u "   /* (10. min_flt) */
+   "%*u "   /* (11. cmin_flt) */
+   "%*u "   /* (12. maj_flt) */
+   "%*u "   /* (13. cmaj_flt) */
+   "%llu "  /* 14. utime */
+   "%llu "  /* 15. stime */
+   "%*d "   /* (16. 

[ovs-dev] [PATCH] packets: Do not initialize ct_orig_tuple.

2017-06-22 Thread Bhanuprakash Bodireddy
Commit "odp: Support conntrack orig tuple key." introduced new fields
in struct 'pkt_metadata'.  pkt_metadata_init() is called for every
packet in the userspace datapath.  When testing a simple single
flow case with DPDK, we observe a lower throughput after the above
commit (it was 14.88 Mpps before, it is 13 Mpps after).

This patch skips initializing ct_orig_tuple in pkt_metadata_init().
It should be enough to initialize ct_state, because nobody should look
at ct_orig_tuple unless ct_state is != 0.

It's discussed at:
https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332419.html

Fixes: daf4d3c18da4("odp: Support conntrack orig tuple key.")
Signed-off-by: Daniele Di Proietto <diproiet...@vmware.com>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Co-authored-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
Original RFC was posted by Daniele here:
https://mail.openvswitch.org/pipermail/ovs-dev/2017-March/329679.html

In this patch moved the offset from ct_orig_tuple to 'ct_orig_tuple_ipv6'.
This patch fixes the performance drop(~2.3Mpps for P2P - 64 byte pkts)
with OvS-DPDK on Master.

 lib/packets.h | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lib/packets.h b/lib/packets.h
index a9d5e84..94c3dcc 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -126,10 +126,19 @@ pkt_metadata_init_tnl(struct pkt_metadata *md)
 static inline void
 pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
 {
+/* This is called for every packet in userspace datapath and affects
+ * performance if all the metadata is initialized. Hence absolutely
+ * necessary fields should be zeroed out.
+ *
+ * Initialize only the first 17 bytes of metadata (till ct_state).
+ * Once the ct_state is zeroed out rest of ct fields will not be looked
+ * at unless ct_state != 0.
+ */
+memset(md, 0, offsetof(struct pkt_metadata, ct_orig_tuple_ipv6));
+
 /* It can be expensive to zero out all of the tunnel metadata. However,
  * we can just zero out ip_dst and the rest of the data will never be
  * looked at. */
-memset(md, 0, offsetof(struct pkt_metadata, in_port));
 md->tunnel.ip_dst = 0;
 md->tunnel.ipv6_dst = in6addr_any;
 md->in_port.odp_port = port;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v9] netdev-dpdk: Increase pmd thread priority.

2017-06-22 Thread Bhanuprakash Bodireddy
Increase the DPDK pmd thread scheduling priority by lowering the nice
value. This will advise the kernel scheduler to prioritize pmd thread
over other processes and will help PMD to provide deterministic
performance in out-of-the-box deployments.

This patch sets the nice value of PMD threads to '-20'.

  $ ps -eLo comm,policy,psr,nice | grep pmd

   COMMAND  POLICY  PROCESSORNICE
pmd62 TS3-20
pmd63 TS0-20
pmd64 TS1-20
pmd65 TS2-20

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Tested-by: Billy O'Mahony <billy.o.mah...@intel.com>
---
v8->v9:
* Rebase

v7->v8:
* Rebase
* Update the documentation file @Documentation/intro/install/dpdk-advanced.rst

v6->v7:
* Remove realtime scheduling policy logic.
* Increase pmd thread scheduling priority by lowering nice value to -20.
* Update doc accordingly.

v5->v6:
* Prohibit spawning pmd thread on the lowest core in dpdk-lcore-mask if
  lcore-mask and pmd-mask affinity are identical.
* Updated Note section in INSTALL.DPDK-ADVANCED doc.
* Tested below cases to verify system stability with pmd priority patch

v4->v5:
* Reword Note section in DPDK-ADVANCED.md

v3->v4:
* Document update
* Use ovs_strerror for reporting errors in lib-numa.c

v2->v3:
* Move set_priority() function to lib/ovs-numa.c
* Apply realtime scheduling policy and priority to pmd thread only if
  pmd-cpu-mask is passed.
* Update INSTALL.DPDK-ADVANCED.

v1->v2:
* Removed #ifdef and introduced dummy function "pmd_thread_setpriority"
  in netdev-dpdk.h
* Rebase

 Documentation/intro/install/dpdk.rst |  8 +++-
 lib/dpif-netdev.c|  4 
 lib/ovs-numa.c   | 21 +
 lib/ovs-numa.h   |  1 +
 4 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/Documentation/intro/install/dpdk.rst 
b/Documentation/intro/install/dpdk.rst
index e83f852..b5c26ba 100644
--- a/Documentation/intro/install/dpdk.rst
+++ b/Documentation/intro/install/dpdk.rst
@@ -453,7 +453,8 @@ affinitized accordingly.
   to be affinitized to isolated cores for optimum performance.
 
   By setting a bit in the mask, a pmd thread is created and pinned to the
-  corresponding CPU core. e.g. to run a pmd thread on core 2::
+  corresponding CPU core with nice value set to -20.
+  e.g. to run a pmd thread on core 2::
 
   $ ovs-vsctl set Open_vSwitch . other_config:pmd-cpu-mask=0x4
 
@@ -493,6 +494,11 @@ improvements as there will be more total CPU occupancy 
available::
 
 NIC port0 <-> OVS <-> VM <-> OVS <-> NIC port 1
 
+  .. note::
+It is recommended that the OVS control thread and pmd thread shouldn't be
+pinned to the same core i.e 'dpdk-lcore-mask' and 'pmd-cpu-mask' cpu mask
+settings should be non-overlapping.
+
 DPDK Physical Port Rx Queues
 
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f83b632..6bbd786 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3712,6 +3712,10 @@ pmd_thread_main(void *f_)
 ovs_numa_thread_setaffinity_core(pmd->core_id);
 dpdk_set_lcore_id(pmd->core_id);
 poll_cnt = pmd_load_queues_and_ports(pmd, _list);
+
+/* Set pmd thread's nice value to -20 */
+#define MIN_NICE -20
+ovs_numa_thread_setpriority(MIN_NICE);
 reload:
 emc_cache_init(>flow_cache);
 
diff --git a/lib/ovs-numa.c b/lib/ovs-numa.c
index 98e97cb..a1921b3 100644
--- a/lib/ovs-numa.c
+++ b/lib/ovs-numa.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #endif /* __linux__ */
@@ -570,3 +571,23 @@ int ovs_numa_thread_setaffinity_core(unsigned core_id 
OVS_UNUSED)
 return EOPNOTSUPP;
 #endif /* __linux__ */
 }
+
+int
+ovs_numa_thread_setpriority(int nice OVS_UNUSED)
+{
+if (dummy_numa) {
+return 0;
+}
+
+#ifndef _WIN32
+int err;
+err = setpriority(PRIO_PROCESS, 0, nice);
+if (err) {
+VLOG_ERR("Thread priority error %s",ovs_strerror(err));
+}
+
+return 0;
+#else
+return EOPNOTSUPP;
+#endif
+}
diff --git a/lib/ovs-numa.h b/lib/ovs-numa.h
index 6946cdc..e132483 100644
--- a/lib/ovs-numa.h
+++ b/lib/ovs-numa.h
@@ -62,6 +62,7 @@ bool ovs_numa_dump_contains_core(const struct ovs_numa_dump *,
 size_t ovs_numa_dump_count(const struct ovs_numa_dump *);
 void ovs_numa_dump_destroy(struct ovs_numa_dump *);
 int ovs_numa_thread_setaffinity_core(unsigned core_id);
+int ovs_numa_thread_setpriority(int nice);
 
 #define FOR_EACH_CORE_ON_DUMP(ITER, DUMP)\
 HMAP_FOR_EACH((ITER), hmap_node, &(DUMP)->cores)
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/6] process: Consolidate process related APIs.

2017-06-19 Thread Bhanuprakash Bodireddy
As part of retrieving system statistics, process status APIs along with
helper functions were implemented. Some of them are very generic and can
be reused by other subsystems.

Move the APIs in system-stats.c to process.c and util.c and make them
available. This patch doesn't change any functionality.

CC: Ben Pfaff <b...@ovn.org>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/process.c   | 172 +
 lib/process.h   |  12 +++
 lib/util.c  |  85 
 lib/util.h  |   4 +
 vswitchd/system-stats.c | 251 +---
 5 files changed, 274 insertions(+), 250 deletions(-)

diff --git a/lib/process.c b/lib/process.c
index e9d0ba9..235c9de 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -33,6 +33,7 @@
 #include "poll-loop.h"
 #include "signals.h"
 #include "socket-util.h"
+#include "timeval.h"
 #include "util.h"
 #include "openvswitch/vlog.h"
 
@@ -40,6 +41,13 @@ VLOG_DEFINE_THIS_MODULE(process);
 
 COVERAGE_DEFINE(process_start);
 
+#ifdef __linux__
+#define LINUX 1
+#include 
+#else
+#define LINUX 0
+#endif
+
 struct process {
 struct ovs_list node;
 char *name;
@@ -50,6 +58,15 @@ struct process {
 int status;
 };
 
+struct raw_process_info {
+unsigned long int vsz;  /* Virtual size, in kB. */
+unsigned long int rss;  /* Resident set size, in kB. */
+long long int uptime;   /* ms since started. */
+long long int cputime;  /* ms of CPU used during 'uptime'. */
+pid_t ppid; /* Parent. */
+char name[18];  /* Name (surrounded by parentheses). */
+};
+
 /* Pipe used to signal child termination. */
 static int fds[2];
 
@@ -327,6 +344,161 @@ process_status(const struct process *p)
 return p->status;
 }
 
+int
+count_crashes(pid_t pid)
+{
+char file_name[128];
+const char *paren;
+char line[128];
+int crashes = 0;
+FILE *stream;
+
+ovs_assert(LINUX);
+
+sprintf(file_name, "/proc/%lu/cmdline", (unsigned long int) pid);
+stream = fopen(file_name, "r");
+if (!stream) {
+VLOG_WARN_ONCE("%s: open failed (%s)", file_name, ovs_strerror(errno));
+goto exit;
+}
+
+if (!fgets(line, sizeof line, stream)) {
+VLOG_WARN_ONCE("%s: read failed (%s)", file_name,
+   feof(stream) ? "end of file" : ovs_strerror(errno));
+goto exit_close;
+}
+
+paren = strchr(line, '(');
+if (paren) {
+int x;
+if (ovs_scan(paren + 1, "%d", )) {
+crashes = x;
+}
+}
+
+exit_close:
+fclose(stream);
+exit:
+return crashes;
+}
+
+static bool
+get_raw_process_info(pid_t pid, struct raw_process_info *raw)
+{
+unsigned long long int vsize, rss, start_time, utime, stime;
+long long int start_msec;
+unsigned long ppid;
+char file_name[128];
+FILE *stream;
+int n;
+
+ovs_assert(LINUX);
+
+sprintf(file_name, "/proc/%lu/stat", (unsigned long int) pid);
+stream = fopen(file_name, "r");
+if (!stream) {
+VLOG_ERR_ONCE("%s: open failed (%s)",
+  file_name, ovs_strerror(errno));
+return false;
+}
+
+n = fscanf(stream,
+   "%*d "   /* (1. pid) */
+   "%17s "  /* 2. process name */
+   "%*c "   /* (3. state) */
+   "%lu "   /* 4. ppid */
+   "%*d "   /* (5. pgid) */
+   "%*d "   /* (6. sid) */
+   "%*d "   /* (7. tty_nr) */
+   "%*d "   /* (8. tty_pgrp) */
+   "%*u "   /* (9. flags) */
+   "%*u "   /* (10. min_flt) */
+   "%*u "   /* (11. cmin_flt) */
+   "%*u "   /* (12. maj_flt) */
+   "%*u "   /* (13. cmaj_flt) */
+   "%llu "  /* 14. utime */
+   "%llu "  /* 15. stime */
+   "%*d "   /* (16. cutime) */
+   "%*d "   /* (17. cstime) */
+   "%*d "   /* (18. priority) */
+   "%*d "   /* (19. nice) */
+   "%*d "   /* (20. num_threads) */
+   "%*d "   /* (21. always 0) */
+   "%llu "  /* 22. start_time */
+   "%llu "  /* 23. vsize */
+   "%llu "  /* 24. rss */
+#if 0
+   /* These are here for documentation but #

[ovs-dev] [PATCH 3/6] dpctl: Skip invoking qsort on empty list

2017-06-19 Thread Bhanuprakash Bodireddy
Clang reports "Argument with 'nonnull' attribute passed null" warning.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpctl.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/dpctl.c b/lib/dpctl.c
index 7f44d02..2ad475b 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -555,7 +555,9 @@ show_dpif(struct dpif *dpif, struct dpctl_params *dpctl_p)
 n_port_nos++;
 }
 
-qsort(port_nos, n_port_nos, sizeof *port_nos, compare_port_nos);
+if (port_nos) {
+qsort(port_nos, n_port_nos, sizeof *port_nos, compare_port_nos);
+}
 
 for (int i = 0; i < n_port_nos; i++) {
 if (dpif_port_query_by_number(dpif, port_nos[i], _port)) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 4/6] dpif-netlink-rtnl: Fix dead store reported by clang.

2017-06-19 Thread Bhanuprakash Bodireddy
Clang reports variable 'ifmsg' never been used in the function.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netlink-rtnl.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/lib/dpif-netlink-rtnl.c b/lib/dpif-netlink-rtnl.c
index c3c31eb..17ae24a 100644
--- a/lib/dpif-netlink-rtnl.c
+++ b/lib/dpif-netlink-rtnl.c
@@ -140,11 +140,9 @@ rtnl_policy_parse(const char *kind, struct ofpbuf *reply,
 {
 struct nlattr *linkinfo[ARRAY_SIZE(linkinfo_policy)];
 struct nlattr *rtlink[ARRAY_SIZE(rtlink_policy)];
-struct ifinfomsg *ifmsg;
 int error = 0;
 
-ifmsg = ofpbuf_at(reply, NLMSG_HDRLEN, sizeof *ifmsg);
-if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof *ifmsg,
+if (!nl_policy_parse(reply, NLMSG_HDRLEN + sizeof(struct ifinfomsg),
  rtlink_policy, rtlink, ARRAY_SIZE(rtlink_policy))
 || !nl_parse_nested(rtlink[IFLA_LINKINFO], linkinfo_policy,
 linkinfo, ARRAY_SIZE(linkinfo_policy))
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/6] dpif-netdev: Skip invoking qsort on empty list.

2017-06-19 Thread Bhanuprakash Bodireddy
sorted_poll_list() returns the sorted list of rxqs mapped to PMD thread
along with the rxq count. Skip sorting the list if there are no rxqs
mapped to the PMD thread. This can be reproduced with manual pinning and
'dpif-netdev/pmd-rxq-show' command.

Also Clang reports that null argument is passed to qsort in this case.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2b65dc7..4b5b23b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -884,10 +884,9 @@ sorted_poll_list(struct dp_netdev_pmd_thread *pmd, struct 
rxq_poll **list,
 i++;
 }
 ovs_assert(i == *n);
+qsort(ret, *n, sizeof *ret, compare_poll_list);
 }
 
-qsort(ret, *n, sizeof *ret, compare_poll_list);
-
 *list = ret;
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 6/6] netdev: Fix null pointer dereference reported by clang.

2017-06-19 Thread Bhanuprakash Bodireddy
Clang reports that array access from 'dumps' variable result in null pointer
dereference.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev.c | 16 +---
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/lib/netdev.c b/lib/netdev.c
index 001b7b3..336c141 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -2290,14 +2290,16 @@ netdev_ports_flow_dump_create(const void *obj, int 
*ports)
 
 dumps = count ? xzalloc(sizeof *dumps * count) : NULL;
 
-HMAP_FOR_EACH(data, node, _to_netdev) {
-if (data->obj == obj) {
-if (netdev_flow_dump_create(data->netdev, [i])) {
-continue;
-}
+if (dumps) {
+HMAP_FOR_EACH(data, node, _to_netdev) {
+if (data->obj == obj) {
+if (netdev_flow_dump_create(data->netdev, [i])) {
+continue;
+}
 
-dumps[i]->port = data->dpif_port.port_no;
-i++;
+dumps[i]->port = data->dpif_port.port_no;
+i++;
+}
 }
 }
 ovs_mutex_unlock(_hmap_mutex);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 5/6] test-conntrack: Fix dead store reported by clang.

2017-06-19 Thread Bhanuprakash Bodireddy
Clang reports that value store to 'batch_size' is never read.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 tests/test-conntrack.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test-conntrack.c b/tests/test-conntrack.c
index f79a9fc..5d2f8b8 100644
--- a/tests/test-conntrack.c
+++ b/tests/test-conntrack.c
@@ -197,7 +197,6 @@ test_pcap(struct ovs_cmdl_context *ctx)
 return;
 }
 
-batch_size = 1;
 if (ctx->argc > 2) {
 batch_size = strtoul(ctx->argv[2], NULL, 0);
 if (batch_size == 0 || batch_size > NETDEV_MAX_BURST) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 01/21] vswitch.xml: Add keepalive support.

2017-06-07 Thread Bhanuprakash Bodireddy
Add support for keepalive functionality. By default, the keepalive is off and
can be enabled/disabled only at init time.

For eg:
  To enable keepalive feature.
  'ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true'

  To set timer interval of 500ms for monitoring packet processing cores;
  'ovs-vsctl --no-wait set Open_vSwitch . \
 other_config:keepalive-interval="500"

  To set shared memory block name where the events shall be updated
  'ovs-vsctl --no-wait set Open_vSwitch .
 other_config:keepalive-shm-name="/ovs_keepalive_shm_name"'

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 vswitchd/vswitch.xml | 39 +++
 1 file changed, 39 insertions(+)

diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 892f839..59c96df 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -531,6 +531,45 @@
   
 
   
+
+  
+
+  The keepalive column contains key-value pairs that
+  report health of datapath cores in Open vSwitch.  These are updated
+  periodically (based on the keepalive-interval).
+
+
+
+  Keepalive is disabled by default to avoid overhead in the common
+  case when heartbeat monitoring is not useful.  Set this value to
+  true to enable keepalive 
+  column or to false to explicitly disable it.
+
+
+
+  
+Specifies the keepalive interval value.
+  
+  
+If not specified, this will be set to 100 milliseconds (default
+value). Changing this value requires restarting the daemon.
+  
+
+
+
+  
+Specifies the keepalive shared memory block name.
+  
+  
+If not specified, shared memory block named "keepalive_shm_name"
+(default name) is created. Changing this value requires restarting
+the daemon.
+  
+
+  
 
 
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 06/21] bridge: Invoke keepalive framework.

2017-06-07 Thread Bhanuprakash Bodireddy
With this commit the keepalive framework is invoked when enable-keepalive
is set to 'true' in ovsdb. As part of initialization the shared memory block
is created and initialized. Also the keepalive data structure is initialized
and the callback functions are registered if DPDK datapath is enabled.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 vswitchd/bridge.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index cc7a43b..d40879d 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -34,6 +34,7 @@
 #include "hmapx.h"
 #include "if-notifier.h"
 #include "jsonrpc.h"
+#include "keepalive.h"
 #include "lacp.h"
 #include "mac-learning.h"
 #include "mcast-snooping.h"
@@ -2958,6 +2959,9 @@ bridge_run(void)
 dpdk_init(>other_config);
 }
 
+/* Initialize Keepalive framework */
+ka_init(>other_config);
+
 /* Initialize the ofproto library.  This only needs to run once, but
  * it must be done after the configuration is set.  If the
  * initialization has already occurred, bridge_init_ofproto()
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 05/21] Keepalive: Add initial keepalive support.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit introduces the initial keepalive support by adding
'keepalive' module and also helper and initialization functions
that will be invoked by later commits.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/automake.mk   |   2 +
 lib/dpdk.c|  23 
 lib/dpdk.h|   1 +
 lib/keepalive.c   | 163 ++
 lib/keepalive.h   |  59 
 lib/netdev-dpdk.c |  80 ++-
 lib/netdev-dpdk.h |   5 ++
 7 files changed, 332 insertions(+), 1 deletion(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

diff --git a/lib/automake.mk b/lib/automake.mk
index f5baba2..1b05221 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -110,6 +110,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/json.c \
lib/jsonrpc.c \
lib/jsonrpc.h \
+   lib/keepalive.c \
+   lib/keepalive.h \
lib/lacp.c \
lib/lacp.h \
lib/latch.h \
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 9c764b9..3f5669b 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -32,6 +32,7 @@
 
 #include "dirs.h"
 #include "fatal-signal.h"
+#include "keepalive.h"
 #include "netdev-dpdk.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
@@ -477,6 +478,28 @@ dpdk_init(const struct smap *ovs_other_config)
 }
 }
 
+int
+dpdk_ka_init(void)
+{
+struct keepalive_shm *ka_shm = get_ka_shm();
+if (!ka_shm) {
+VLOG_ERR("SHM uninitialized? keepalive initialization aborted.");
+return -1;
+}
+
+/* Initialize keepalive subsystem */
+if ((rte_global_keepalive_info =
+rte_keepalive_create(_failcore_cb, ka_shm)) == NULL) {
+VLOG_ERR("Keepalive initialization failed.");
+return -1;
+} else {
+rte_keepalive_register_relay_callback(rte_global_keepalive_info,
+dpdk_ka_update_core_state, ka_shm);
+}
+
+return 0;
+}
+
 const char *
 dpdk_get_vhost_sock_dir(void)
 {
diff --git a/lib/dpdk.h b/lib/dpdk.h
index bdbb51b..dc830c4 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -37,6 +37,7 @@ struct smap;
 
 struct rte_keepalive *rte_global_keepalive_info;
 void dpdk_init(const struct smap *ovs_other_config);
+int dpdk_ka_init(void);
 void dpdk_set_lcore_id(unsigned cpu);
 const char *dpdk_get_vhost_sock_dir(void);
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
new file mode 100644
index 000..0de6f49
--- /dev/null
+++ b/lib/keepalive.c
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "dpdk.h"
+#include "keepalive.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(keepalive);
+
+static bool keepalive_enable = false;   /* KeepAlive disabled by default */
+static bool ka_init_status = ka_init_failure; /* KeepAlive initialization */
+static uint32_t keepalive_timer_interval; /* keepalive timer interval */
+
+static const char *keepalive_shm_blk = NULL;
+struct keepalive_shm *ka_shm = NULL;
+
+/* Return the Keepalive shared memory block name. */
+static inline const char *
+get_ka_shm_blk(void)
+{
+return keepalive_shm_blk;
+}
+
+inline struct keepalive_shm *
+get_ka_shm(void)
+{
+return ka_shm;
+}
+
+/* Retrieve and return the keepalive timer interval from OVSDB. */
+static uint32_t
+get_ka_timer_interval(const struct smap *ovs_other_config OVS_UNUSED)
+{
+#define OVS_KEEPALIVE_TIMEOUT 100/* Default timeout set to 100ms */
+uint32_t ka_interval;
+
+/* Timer granularity in milliseconds
+ * Defaults to OVS_KEEPALIVE_TIMEOUT(ms) if not set */
+ka_interval = smap_get_int(ovs_other_config, "keepalive-interval",
+  OVS_KEEPALIVE_TIMEOUT);
+
+VLOG_INFO("Keepalive timer interval set to %"PRIu32" (ms)\n", ka_interval);
+return ka_interval;
+}
+
+static const char *
+get_ka_shm_block(const struct smap *ovs_other_config OVS_UNUSED)
+{
+/* Shared mem block. */
+#define OVS_KEEPALIVE_SHM_NAME /dpdk_keepalive_shm_name
+keepalive_shm_blk = smap_get(ovs_other_config, "keepalive-shm-name");
+if (!keepalive_shm_blk) {
+keepalive_shm_b

[ovs-dev] [RFC PATCH 16/21] keepalive: Check the link status as part of PMD health checks.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds the initial support in to performing PMD health checks.
The ports handled by the PMD threads are checked for the link status and
the same is updated in to SHM block.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 41 --
 lib/keepalive.c   | 76 +++
 lib/keepalive.h   | 33 
 3 files changed, 148 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b2f0611..b7689e3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -72,6 +72,7 @@
 #include "seq.h"
 #include "smap.h"
 #include "sset.h"
+#include "svec.h"
 #include "timeval.h"
 #include "tnl-neigh-cache.h"
 #include "tnl-ports.h"
@@ -971,9 +972,45 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 }
 
 static void
-pmd_health_check(struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
+pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 {
-/* Nothing */
+int port_link_status = 0;
+struct rxq_poll *poll;
+
+struct svec pmd_poll_list;
+svec_init(_poll_list);
+HMAP_FOR_EACH (poll, node, >poll_list) {
+svec_add(_poll_list, netdev_rxq_get_name(poll->rxq->rx));
+}
+
+/* With MQ enabled, remove the duplicates. */
+svec_sort_unique(_poll_list);
+
+const char *port_name;
+int i = 0;
+SVEC_FOR_EACH(i, port_name, _poll_list) {
+HMAP_FOR_EACH (poll, node, >poll_list) {
+if (!strcmp(port_name, netdev_rxq_get_name(poll->rxq->rx))) {
+char *link_state;
+link_state =
+netdev_get_carrier(poll->rxq->port->netdev) ? "up" : 
"down";
+
+ka_shm_update_port_status(netdev_rxq_get_name(poll->rxq->rx),
+  netdev_rxq_get_queue_id(poll->rxq->rx),
+  link_state, pmd->core_id, i);
+break;
+}
+}
+}
+svec_destroy(_poll_list);
+
+port_link_status = ka_get_polled_ports_status(pmd->core_id);
+
+if (port_link_status == ACTIVE_RUN_STATE) {
+ka_set_pmd_state(pmd->core_id, KA_STATE_ALIVE);
+} else {
+ka_set_pmd_state(pmd->core_id, KA_STATE_CHECK);
+}
 }
 
 static void
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 9fa9ad9..d1858ac 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -148,6 +148,17 @@ ka_is_pmdhealth_check_needed(unsigned core_id)
 return ka_pmd_core_health[core_id];
 }
 
+void
+ka_set_pmd_state(unsigned core_id, enum keepalive_state state)
+{
+struct keepalive_shm *ka_shm = get_ka_shm();
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block.");
+}
+
+ka_shm->core_state[core_id] = state;
+}
+
 /* Retrieve and return the keepalive timer interval from OVSDB. */
 static uint32_t
 get_ka_timer_interval(const struct smap *ovs_other_config OVS_UNUSED)
@@ -310,6 +321,71 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+enum pmdhealth_status ka_get_polled_ports_status(unsigned core_id)
+{
+struct keepalive_shm *ka_shm = get_ka_shm();
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block.");
+return -1;
+}
+
+int failed = 0;
+int n_ports = ka_shm->ext_stats[core_id].num_poll_ports;
+for (int i = 0; i < n_ports; i++) {
+int state;
+state =
+  ka_shm->ext_stats[core_id].port_stats[i].state[PORT_STATUS_CHECK];
+if (state == FAILURE_STATE) {
+failed = 1;
+break;
+}
+}
+
+if (!failed) {
+return ACTIVE_RUN_STATE;
+} else {
+return FAILURE_STATE;
+}
+}
+
+void
+ka_shm_update_port_status(const char *port, int qid, char *link_state,
+  int core_id, int idx)
+{
+struct keepalive_shm *ka_shm = get_ka_shm();
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block.");
+return;
+}
+
+/* XXX */
+if (idx >= MAX_POLL_PORTS) {
+VLOG_ERR_RL(, "KeepAlive: Max poll ports per PMD thread reached.");
+return;
+}
+
+ka_shm->ext_stats[core_id].num_poll_ports = idx;
+
+if (core_id != NON_PMD_CORE_ID) {
+ka_shm->ext_stats[core_id].port_stats[idx].port = port;
+ka_shm->ext_stats[core_id].port_stats[idx].qid = qid;
+ka_shm->ext_stats[core_id].port_stats[idx].link_state =
+   link_state;
+} else {
+VLOG_ERR_RL(, "KeepAlive: Invalid pmd core_id.");
+}
+
+int state;
+if (!strcmp(link_state, "down")) {
+state = FAILURE_STATE;
+} else {
+state = ACTIVE_RUN_STATE;
+}
+
+   

[ovs-dev] [RFC PATCH 11/21] bridge: Update keepalive status in ovsdb

2017-06-07 Thread Bhanuprakash Bodireddy
This commit allows vswitchd thread to update the ovsdb with the
datapath status along with the status for all registered PMD threads.
The status can be monitored using ovsdb-client.

  $ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive

rowaction keepalive
7b746190-ee71-4dcc-becf-f8cb9c7cb909 old  { "CORE_0"="ALIVE,9226457935188922"
"CORE_1"="ALIVE,9226457935189628"
"CORE_2"="ALIVE,9226457935189897"
"CORE_3"="ALIVE,9226457935190127"
"Datapath status"=HEALTHY}

 new  { "CORE_0"="ALIVE,9226460230167364"
"CORE_1"="ALIVE,9226460230168100"
"CORE_2"="ALIVE,9226460230168905"
        "CORE_3"="ALIVE,9226460230169632"
"Datapath status"=HEALTHY}

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c   | 15 +++
 lib/keepalive.h   |  1 +
 vswitchd/bridge.c | 26 ++
 3 files changed, 42 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 239c666..acb4dd2 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -256,6 +256,21 @@ get_ka_stats(void)
 ovs_mutex_unlock();
 }
 
+struct smap *
+ka_stats_run(void)
+{
+struct smap *ka_stats = NULL;
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+ka_stats = keepalive_stats;
+keepalive_stats = NULL;
+}
+ovs_mutex_unlock();
+
+return ka_stats;
+}
+
 static int
 ka_init__(void)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index bdee16e..1d37050 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -65,5 +65,6 @@ bool is_ka_enabled(void);
 uint32_t get_ka_interval(void);
 int get_ka_init_status(void);
 void get_ka_stats(void);
+struct smap *ka_stats_run(void);
 
 #endif /* keepalive.h */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index d40879d..ef62b6c 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -286,6 +286,7 @@ static bool port_is_synthetic(const struct port *);
 
 static void reconfigure_system_stats(const struct ovsrec_open_vswitch *);
 static void run_system_stats(void);
+static void run_keepalive_stats(void);
 
 static void bridge_configure_mirrors(struct bridge *);
 static struct mirror *mirror_create(struct bridge *,
@@ -403,6 +404,7 @@ bridge_init(const char *remote)
 
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_cur_cfg);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_statistics);
+ovsdb_idl_omit_alert(idl, _open_vswitch_col_keepalive);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_datapath_types);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_iface_types);
 ovsdb_idl_omit(idl, _open_vswitch_col_external_ids);
@@ -2689,6 +2691,29 @@ run_system_stats(void)
 }
 }
 
+void
+run_keepalive_stats(void)
+{
+struct smap *ka_stats;
+const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(idl);
+
+ka_stats = ka_stats_run();
+if (ka_stats && cfg) {
+struct ovsdb_idl_txn *txn;
+struct ovsdb_datum datum;
+
+txn = ovsdb_idl_txn_create(idl);
+ovsdb_datum_from_smap(, ka_stats);
+smap_destroy(ka_stats);
+ovsdb_idl_txn_write(>header_, _open_vswitch_col_keepalive,
+);
+ovsdb_idl_txn_commit(txn);
+ovsdb_idl_txn_destroy(txn);
+
+free(ka_stats);
+}
+}
+
 static const char *
 ofp12_controller_role_to_str(enum ofp12_controller_role role)
 {
@@ -3037,6 +3062,7 @@ bridge_run(void)
 run_stats_update();
 run_status_update();
 run_system_stats();
+run_keepalive_stats();
 }
 
 void
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 12/21] keepalive: Add support to query keepalive statistics.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds support to query keepalive statistics stored in
posix shared memory block. Datapath health status can be retrieved as
follows:

  $ ovs-appctl keepalive/pmd-health-show

  Keepalive status

keepalive status  : Enabled
keepalive interval: 1000 ms

CORESTATE   LAST SEEN TIMESTAMP
 0  ALIVE   8632183482028293
 1  ALIVE   8632183482028425
 2  ALIVE   8632190191004294
 3  ALIVE   8632183482028525
 4  GONE8612183482028117
 5  ALIVE   8632190191004984
 6  ALIVE   8632190191005713
 7  ALIVE   8632190191006555

Datapath status   : BAD

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 70 +
 1 file changed, 70 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index acb4dd2..3048527 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -24,8 +24,10 @@
 #include "dpdk.h"
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
@@ -271,6 +273,71 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+static void
+ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_shm_)
+{
+struct keepalive_shm *ka_shm = (struct keepalive_shm *)ka_shm_;
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block\n");
+return;
+}
+
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\t\tKeepalive status\n");
+
+ds_put_format(, "keepalive status  : %s\n",
+  is_ka_enabled() ? "Enabled" : "Disabled");
+ds_put_format(, "keepalive interval: %"PRIu32" ms\n",
+  get_ka_interval());
+ds_put_format(,
+  "\nCORE\tSTATE\tLAST SEEN TIMESTAMP\n");
+
+int datapath_failure = 0;
+for (int idx_core = 0; idx_core < KEEPALIVE_MAXCORES; idx_core++) {
+char *state = NULL;
+if (ka_shm->core_state[idx_core] == KA_STATE_UNUSED ||
+ka_shm->core_state[idx_core] == KA_STATE_SLEEP)
+continue;
+
+switch (ka_shm->core_state[idx_core]) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+datapath_failure++;
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+ds_put_format(, "%2d\t%s\t%"PRIu64"\n",
+  idx_core, state, ka_shm->core_last_seen_times[idx_core]);
+}
+
+ds_put_format(, "\n");
+ds_put_format(, "Datapath Status   : %s\n",
+  datapath_failure ? "BAD" : "HEALTHY");
+
+unixctl_command_reply(conn, ds_cstr());
+}
+
 static int
 ka_init__(void)
 {
@@ -306,6 +373,9 @@ ka_init(const struct smap *ovs_other_config)
 VLOG_INFO("OvS Keepalive - initialized.");
 ka_init_status = ka_init_success;
 }
+
+unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
+  ka_unixctl_pmd_health_show, ka_shm);
 } else {
 VLOG_ERR("keepalive_shm_create() failed.");
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 20/21] keepalive: Display extended Keepalive status.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds support to display the extended keepalive statistics
stored in SHM block. The status can be displayed as follows.

  $ ovs-appctl keepalive/pmd-xstats-show

  keepAlive Status  : Enabled
  keepAlive Interval: 1000 ms

  CORE: 0
  PMD thread id   : 1269 [ACTIVE]
  PMD heartbeats  : enabled
  PMD thread state: ALIVE
  Last seen timestamp : 9123706507798853
  PMD failure cnt : 0

  CORE: 1
  PMD thread id   : 1270 [ACTIVE]
  PMD heartbeats  : enabled
  PMD thread state: ALIVE
  Last seen timestamp : 9123706507801627
  PMD failure cnt : 0

  CORE: 2
  PMD thread id   : 1271 [ACTIVE]
  PMD heartbeats  : enabled
  PMD thread state: ALIVE
  Last seen timestamp : 9125112827794550
  PMD failure cnt : 0
  PMD health check: enabled
  Packet Stats
  Port dpdk0, Queue: 1, Link status: up
  rx_packets : 1801284454
  tx_packets : 0
  Cycle Stats
  Polling cycles : 35426111637
  Processing cycles : 10123697085

  Datapath status   : HEALTHY

For PMD on core 2, on a heartbeat failure, health checks are enabled
and additional stats(pkt stats, cpu cycles) are displayed as above.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 124 
 1 file changed, 124 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 3b00d01..15ca400 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -28,6 +28,7 @@
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
+#include "process.h"
 #include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
@@ -625,6 +626,126 @@ ka_unixctl_status(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 ds_destroy();
 }
 
+static void
+ka_unixctl_pmd_xstats_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_shm_)
+{
+struct keepalive_shm *ka_shm = (struct keepalive_shm *)ka_shm_;
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block\n");
+return;
+}
+
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\t\tKeepalive xstats\n");
+
+ds_put_format(, "keepAlive Status: %s\n",
+  is_ka_enabled() ? "Enabled" : "Disabled");
+ds_put_format(, "keepAlive Interval: %"PRIu32" ms\n",
+  get_ka_interval());
+
+int datapath_failure = 0;
+for (int idx_core = 0; idx_core < KEEPALIVE_MAXCORES; idx_core++) {
+char *state = NULL;
+if (ka_shm->core_state[idx_core] == KA_STATE_UNUSED ||
+ ka_shm->core_state[idx_core] == KA_STATE_SLEEP)
+continue;
+
+switch (ka_shm->core_state[idx_core]) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+datapath_failure++;
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+
+ds_put_format(, "\n");
+ds_put_format(, "CORE: %d\n", idx_core);
+
+int pstate;
+int err = get_process_status(ka_shm->thread_id[idx_core], );
+char *tid_state = NULL;
+if (!err) {
+switch (pstate) {
+case ACTIVE_STATE:
+tid_state = "ACTIVE";
+break;
+case STOPPED_STATE:
+case TRACED_STATE:
+case DEFUNC_STATE:
+case UNINTERRUPTIBLE_SLEEP_STATE:
+tid_state = "INACTIVE";
+break;
+}
+} else {
+tid_state = "UNKNOWN";
+}
+
+ds_put_format(, "\tPMD thread-id   : %d [%s]\n",
+  ka_shm->thread_id[idx_core], tid_state);
+ds_put_format(, "\tPMD heartbeats  : %s\n",
+  is_ka_enabled() ? "enabled" : "disabled");
+ds_put_format(, "\tPMD thread state: %s\n", state);
+ds_put_format(, "\tLast seen timestamp : %"PRIu64"\n",
+  ka_shm->core_last_seen_times[idx_core])

[ovs-dev] [RFC PATCH 10/21] keepalive: Retrieve PMD status periodically.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit implements APIs to retrieve the PMD thread status and return
the status in the below format for each PMD thread.

  Format: CORE_="STATUS, last_seen_timestamp"
  eg: CORE_1="ALIVE,9220698256784207"
  CORE_2="GONE,9220698256786231"

The status is periodically retrieved by keepalive thread and stored in
keepalive_stats struc which later shall be retrieved by vswitchd thread.

Also the datapath status is updated by considering the status of all
active PMD threads. In case of four PMD threads the status is as below:

   "CORE_0="ALIVE,9220698256784207"
CORE_1="ALIVE,9220698256784913"
CORE_2="ALIVE,9220698256785902"
CORE_3="ALIVE,9220698256786231"
Datapath status:HEALTHY"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  1 +
 lib/keepalive.c   | 81 +++
 lib/keepalive.h   |  1 +
 3 files changed, 83 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 97c3807..c76c74c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -981,6 +981,7 @@ ovs_keepalive(void *f_)
 int n_pmds = cmap_count(>poll_threads) - 1;
 if (n_pmds > 0) {
 dispatch_heartbeats();
+get_ka_stats();
 }
 
 ovsrcu_quiesce_start();
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 33ddd00..239c666 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -25,8 +25,10 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "ovs-thread.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
 
 static bool keepalive_enable = false;   /* KeepAlive disabled by default */
 static bool ka_init_status = ka_init_failure; /* KeepAlive initialization */
@@ -35,6 +37,9 @@ static uint32_t keepalive_timer_interval; /* keepalive 
timer interval */
 static const char *keepalive_shm_blk = NULL;
 struct keepalive_shm *ka_shm = NULL;
 
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct smap *keepalive_stats OVS_GUARDED_BY(mutex);
+
 /* Return the Keepalive shared memory block name. */
 static inline const char *
 get_ka_shm_blk(void)
@@ -175,6 +180,82 @@ struct keepalive_shm *keepalive_shm_create(void)
 return NULL;
 }
 
+static void
+get_pmd_status(struct smap *ka_pmd_stats)
+{
+struct keepalive_shm *ka_shm = get_ka_shm();
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block.");
+return;
+}
+
+int datapath_failure = 0;
+int n_cores = count_cpu_cores();
+for (int core_id = 0; core_id < n_cores; core_id++) {
+char *state = NULL;
+char *core_id_str;
+
+if (ka_shm->core_state[core_id] == KA_STATE_UNUSED ||
+ka_shm->core_state[core_id] == KA_STATE_SLEEP ) {
+continue;
+}
+
+switch (ka_shm->core_state[core_id]) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+datapath_failure++;
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+
+core_id_str = xasprintf("CORE_%d", core_id);
+smap_add_format(ka_pmd_stats, core_id_str, "%s,%ld",
+state, ka_shm->core_last_seen_times[core_id]);
+free(core_id_str);
+}
+
+smap_add_format(ka_pmd_stats, "Datapath status", "%s",
+   datapath_failure ? "BAD" : "HEALTHY");
+}
+
+void
+get_ka_stats(void)
+{
+struct smap *ka_pmd_stats;
+ka_pmd_stats = xmalloc(sizeof *ka_pmd_stats);
+smap_init(ka_pmd_stats);
+
+get_pmd_status(ka_pmd_stats);
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+smap_destroy(keepalive_stats);
+free(keepalive_stats);
+keepalive_stats = NULL;
+}
+keepalive_stats = ka_pmd_stats;
+ovs_mutex_unlock();
+}
+
 static int
 ka_init__(void)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 920e934..bdee16e 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -64,5 +64,6 @@ void ka_mark_pmd_thread_sleep(void);
 bool is_ka_enabled(void);
 uint32_t get_ka_interval(void);
 int get_ka_init_status(void);
+void get_ka_stats(void);
 
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 07/21] keepalive: Add more helper functions to KA framework.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit introduces helper functions in 'keepalive' module that are
needed to register/unregister PMD threads to KA framework. Also
introduce APIs to mark the PMD core states.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 56 
 lib/keepalive.h |  9 +
 2 files changed, 65 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 0de6f49..33ddd00 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -48,6 +48,62 @@ get_ka_shm(void)
 return ka_shm;
 }
 
+/* If KA enabled return true, otherwise 'false'. */
+inline bool
+is_ka_enabled(void)
+{
+return keepalive_enable;
+}
+
+/* Return the Keepalive timer interval. */
+inline uint32_t
+get_ka_interval(void)
+{
+return keepalive_timer_interval;
+}
+
+inline int
+get_ka_init_status(void)
+{
+return ka_init_status;
+}
+
+/* Register packet processing core 'core_id' for liveness checks. */
+void
+ka_register_pmd_thread(unsigned core_id)
+{
+if (is_ka_enabled()) {
+dpdk_register_pmd_core(core_id);
+}
+}
+
+/* Register packet processing core 'core_id' for liveness checks. */
+void
+ka_unregister_pmd_thread(unsigned core_id)
+{
+if (is_ka_enabled()) {
+dpdk_unregister_pmd_core(core_id);
+}
+}
+
+/* Mark packet processing core alive. */
+inline void
+ka_mark_pmd_thread_alive(void)
+{
+if (is_ka_enabled()) {
+dpdk_mark_pmd_core_alive();
+}
+}
+
+/* Mark packet processing core as idle. */
+inline void
+ka_mark_pmd_thread_sleep(void)
+{
+if (is_ka_enabled()) {
+dpdk_mark_pmd_core_sleep();
+}
+}
+
 /* Retrieve and return the keepalive timer interval from OVSDB. */
 static uint32_t
 get_ka_timer_interval(const struct smap *ovs_other_config OVS_UNUSED)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 15407b7..920e934 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -56,4 +56,13 @@ enum keepalive_status {
 void ka_init(const struct smap *);
 struct keepalive_shm *get_ka_shm(void);
 
+void ka_register_pmd_thread(unsigned);
+void ka_unregister_pmd_thread(unsigned);
+void ka_mark_pmd_thread_alive(void);
+void ka_mark_pmd_thread_sleep(void);
+
+bool is_ka_enabled(void);
+uint32_t get_ka_interval(void);
+int get_ka_init_status(void);
+
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 14/21] dpif-netdev: Add helper function to check false positives.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds an API to store the PMD thread id in SHM block.
The tid later shall be retrieved by callback function that gets invoked to
check for false positives.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  1 +
 lib/keepalive.c   | 13 +
 lib/keepalive.h   |  1 +
 3 files changed, 15 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c76c74c..6ac1bd3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3778,6 +3778,7 @@ pmd_thread_main(void *f_)
 ovs_numa_thread_setaffinity_core(pmd->core_id);
 dpdk_set_lcore_id(pmd->core_id);
 poll_cnt = pmd_load_queues_and_ports(pmd, _list);
+ka_get_tid(pmd->core_id);
 reload:
 emc_cache_init(>flow_cache);
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index d74b4ab..da830ab 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -75,6 +75,19 @@ get_ka_init_status(void)
 return ka_init_status;
 }
 
+void
+ka_get_tid(unsigned core_idx)
+{
+uint32_t tid = 0;
+#ifdef DPDK_NETDEV
+tid = rte_sys_gettid();
+#endif
+
+if (is_ka_enabled()) {
+ka_shm->thread_id[core_idx] = tid;
+}
+}
+
 /* Register packet processing core 'core_id' for liveness checks. */
 void
 ka_register_pmd_thread(unsigned core_id)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 1d37050..63f35f0 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -61,6 +61,7 @@ void ka_unregister_pmd_thread(unsigned);
 void ka_mark_pmd_thread_alive(void);
 void ka_mark_pmd_thread_sleep(void);
 
+void ka_get_tid(unsigned core);
 bool is_ka_enabled(void);
 uint32_t get_ka_interval(void);
 int get_ka_init_status(void);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 04/21] process: Retrieve process status.

2017-06-07 Thread Bhanuprakash Bodireddy
Implement helper function to retrieve the process status. This will be used
by keepalive monitoring thread to detect false alarms and to show PMD
thread state in future commits.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/process.c | 60 +++
 lib/process.h | 10 ++
 2 files changed, 70 insertions(+)

diff --git a/lib/process.c b/lib/process.c
index e9d0ba9..3735cf5 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -50,6 +50,20 @@ struct process {
 int status;
 };
 
+struct pstate2Num {
+char *tidState;
+int num;
+};
+
+const struct pstate2Num pstate_map[] = {
+{ "S", STOPPED_STATE },
+{ "R", ACTIVE_STATE },
+{ "t", TRACED_STATE },
+{ "Z", DEFUNC_STATE },
+{ "D", UNINTERRUPTIBLE_SLEEP_STATE },
+{ "NULL", UNUSED_STATE },
+};
+
 /* Pipe used to signal child termination. */
 static int fds[2];
 
@@ -390,6 +404,52 @@ process_run(void)
 #endif
 }
 
+int
+get_process_status(int tid, int *pstate)
+{
+#ifdef __linux__
+static char process_name[20];
+FILE *stream;
+char line[75];
+char Name[15], value[5], status[20];
+int i, ln;
+
+snprintf(process_name, sizeof(process_name),
+ "/proc/%d/status", tid);
+stream = fopen(process_name, "r");
+if (stream == NULL) {
+VLOG_WARN_ONCE("%s: open failed: %s", process_name,
+ovs_strerror(errno));
+return errno;
+}
+
+ln=0;
+while (fgets(line, sizeof line, stream)) {
+if (!ovs_scan(line,
+  "%6s %2s %14s\n",
+   Name, value, status)) {
+VLOG_WARN_ONCE("%s: could not parse line %d: %s",
+process_name, ln, line);
+continue;
+}
+if (!strcmp(Name, "State:")) {
+for (i=0; pstate_map[i].tidState != NULL; i++) {
+if (strcmp(pstate_map[i].tidState, value) == 0) {
+VLOG_WARN_ONCE("The state is %s, status is %d\n",
+pstate_map[i].tidState, pstate_map[i].num);
+*pstate = pstate_map[i].num;
+break;
+}
+}
+break;
+}
+ln++;
+   }
+   return 0;
+#else
+   return ENOSYS;
+#endif
+}
 
 /* Causes the next call to poll_block() to wake up when process 'p' has
  * exited. */
diff --git a/lib/process.h b/lib/process.h
index 3feac7e..8a5513e 100644
--- a/lib/process.h
+++ b/lib/process.h
@@ -20,6 +20,15 @@
 #include 
 #include 
 
+enum process_states {
+UNUSED_STATE,
+STOPPED_STATE,
+ACTIVE_STATE,
+TRACED_STATE,
+DEFUNC_STATE,
+UNINTERRUPTIBLE_SLEEP_STATE
+};
+
 struct process;
 
 /* Starting and monitoring subprocesses.
@@ -38,6 +47,7 @@ bool process_exited(struct process *);
 int process_status(const struct process *);
 void process_run(void);
 void process_wait(struct process *);
+int get_process_status(int, int *);
 
 /* These functions are thread-safe. */
 char *process_status_msg(int);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 03/21] dpdk: Add helper functions for DPDK datapath keepalive.

2017-06-07 Thread Bhanuprakash Bodireddy
Introduce helper functions in 'dpdk' module that are needed for
DPDK keepalive functionality. Also add dummy functions in 'dpdk-stub' module
that are needed when DPDK datapath is not available.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpdk-stub.c | 24 
 lib/dpdk.c  | 31 +++
 lib/dpdk.h  | 10 ++
 3 files changed, 65 insertions(+)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index daef729..d7fb19b 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -48,3 +48,27 @@ dpdk_get_vhost_sock_dir(void)
 {
 return NULL;
 }
+
+void
+dpdk_register_pmd_core(unsigned core_id OVS_UNUSED)
+{
+/* Nothing */
+}
+
+void
+dpdk_unregister_pmd_core(unsigned core_id OVS_UNUSED)
+{
+/* Nothing */
+}
+
+void
+dpdk_mark_pmd_core_alive(void)
+{
+/* Nothing */
+}
+
+void
+dpdk_mark_pmd_core_sleep(void)
+{
+/* Nothing */
+}
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 8da6c32..9c764b9 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #ifdef DPDK_PDUMP
@@ -489,3 +490,33 @@ dpdk_set_lcore_id(unsigned cpu)
 ovs_assert(cpu != NON_PMD_CORE_ID);
 RTE_PER_LCORE(_lcore_id) = cpu;
 }
+
+/* Register Packet processing core 'core_id' for liveness checks. */
+void
+dpdk_register_pmd_core(unsigned core)
+{
+rte_keepalive_register_core(rte_global_keepalive_info, core);
+}
+
+void
+dpdk_unregister_pmd_core(unsigned core OVS_UNUSED)
+{
+/* XXX: DPDK unfortunately hasn't implemented unregister API
+ * This will be fixed later, instead use sleep API now.
+ */
+rte_keepalive_mark_sleep(rte_global_keepalive_info);
+}
+
+/* Mark Packet processing core alive. */
+void
+dpdk_mark_pmd_core_alive(void)
+{
+rte_keepalive_mark_alive(rte_global_keepalive_info);
+}
+
+/* Mark packet processing core as idle. */
+void
+dpdk_mark_pmd_core_sleep(void)
+{
+rte_keepalive_mark_sleep(rte_global_keepalive_info);
+}
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 673a1f1..bdbb51b 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -17,6 +17,7 @@
 #ifndef DPDK_H
 #define DPDK_H
 
+#include 
 #ifdef DPDK_NETDEV
 
 #include 
@@ -26,14 +27,23 @@
 
 #else
 
+#include 
+
 #define NON_PMD_CORE_ID UINT32_MAX
 
 #endif /* DPDK_NETDEV */
 
 struct smap;
 
+struct rte_keepalive *rte_global_keepalive_info;
 void dpdk_init(const struct smap *ovs_other_config);
 void dpdk_set_lcore_id(unsigned cpu);
 const char *dpdk_get_vhost_sock_dir(void);
 
+/* Keepalive APIs */
+void dpdk_register_pmd_core(unsigned core_id);
+void dpdk_unregister_pmd_core(unsigned core_id);
+void dpdk_mark_pmd_core_alive(void);
+void dpdk_mark_pmd_core_sleep(void);
+
 #endif /* dpdk.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 21/21] Documentation: Update DPDK doc with Keepalive feature.

2017-06-07 Thread Bhanuprakash Bodireddy
Keepalive feature is aimed at achieving Fastpath Service Assurance
in OVS-DPDK deployments. It adds support for monitoring the packet
processing cores(PMD thread cores) by dispatching heartbeats at regular
intervals. Incase of heartbeat misses additional health checks are
enabled on the PMD thread to detect the failure and the same shall be
reported to higher level fault management systems/frameworks.

The implementation uses OVSDB for reporting the datapath status and the
health of the PMD threads. Any external monitoring application can read
the status from OVSDB at regular intervals (or) subscribe to the updates
in OVSDB so that they get notified when the changes happen on OVSDB.

POSIX shared memory object is created and initialized for storing the
status of the PMD threads. This is initialized by main thread(vswitchd)
as part of init process and will be periodically updated by 'keepalive'
thread. keepalive feature can be enabled through below OVSDB settings.

enable-keepalive=true
  - Keepalive feature is disabled by default.

keepalive-interval="5000"
  - Timer interval in milliseconds for monitoring the packet
processing cores.

keepalive-shm-name="/ovs_keepalive_shm_name"
  - Shared memory block name where the events shall be updated.

When KA is enabled, 'ovs-keepalive' thread shall be spawned that wakes
up at regular intervals to update the timestamp and status of pmd cores
in shared memory region. This information shall be read by vswitchd thread
and written in to 'keepalive' column of Open_vSwitch table in OVSDB.

An external monitoring framework like collectd with ovs events support
can read (or) subscribe to the datapath status changes in ovsdb. When the state
is updated, the collectd shall be notified and will eventually relay the status
to ceilometer service running in the controller. Below is the high level
overview of deployment model.

Compute NodeControllerCompute Node

Collectd  <--> Ceilometer <>   Collectd

OvS DPDK   OvS DPDK

+-+
| VM  |
+--+--+
   \---+---/
   |
+--+---+   ++--+ +--+---+
| OVS  |-> |   ovsevents plugin| --> |   collectd   |
+--+---+   ++--+ +--+---+

+--+-+ +---++ |
| Ceilometer | <-- | collectd ceilometer plugin |  <---
+--+-+ +---++

Performance impact
--
No noticeable performance or latency impact is observed with
KA feature enabled. The tests were run with 100ms KA interval
and latency is (Min:134,710ns, Avg:173,005ns, Max:1,504,670ns)
for Phy2Phy loopback test case with 100 unique streams.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 Documentation/howto/dpdk.rst | 95 
 1 file changed, 95 insertions(+)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index f3c7aff..018ff6a 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -407,6 +407,101 @@ For certain traffic profiles with many parallel flows, 
it's recommended to set
 
 For more information on the EMC refer to :doc:`/intro/install/dpdk` .
 
+.. _dpdk_keepalive:
+
+KeepAlive
+-
+
+OvS KeepAlive(KA) feature is disabled by default. To enable KA feature::
+
+$ ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true
+
+The default timer interval for monitoring packet processing cores is 100ms.
+To set a different timer value, run::
+
+$ ovs-vsctl --no-wait set Open_vSwitch . \
+other_config:keepalive-interval="5000"
+
+The events comprise of core states and the last seen timestamps. The events
+are written in to shared memory region ``/dev/shm/dpdk_keepalive_shm_name``
+by keepalive thread periodically. To write in to a different shared memory
+region, run::
+
+$ ovs-vsctl --no-wait set Open_vSwitch . \
+other_config:keepalive-shm-name="/"
+
+The events in the shared memory block are retrieved by main(vswitchd) thread
+and updated in to keepalive column of Open_vSwitch table in OVSDB. Any external
+monitoring application can read the status from OVSDB at intervals or subscribe
+to the updates so that they get notified when the changes happen on OVSDB.
+`collectd <https://collectd.org/>`__ has built-in support for DPDK and provides
+a `ovs_events` and `ovs_stats` plugin that can be enabled to relay the datapath
+status and the PMD status to OpenStack service `Ceilometer
+<https://docs.openstack.org/developer/ceilometer/>`__.
+
+To install and configure `collectd`, run::
+
+# Clone collectd from Git repository
+$ git clone https://github.com/collectd/collect

[ovs-dev] [RFC PATCH 02/21] ovsschema: Introduce 'keepalive' column in Open_vSwitch.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds new ovsdb column "keepalive". It shows the overall datapath
status and the health of the cores running datapath threads.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 vswitchd/vswitch.ovsschema |  7 +--
 vswitchd/vswitch.xml   | 20 
 2 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/vswitchd/vswitch.ovsschema b/vswitchd/vswitch.ovsschema
index 19b49da..769434e 100644
--- a/vswitchd/vswitch.ovsschema
+++ b/vswitchd/vswitch.ovsschema
@@ -1,6 +1,6 @@
 {"name": "Open_vSwitch",
- "version": "7.15.0",
- "cksum": "544856471 23228",
+ "version": "7.16.0",
+ "cksum": "2916438977 23364",
  "tables": {
"Open_vSwitch": {
  "columns": {
@@ -28,6 +28,9 @@
"statistics": {
  "type": {"key": "string", "value": "string", "min": 0, "max": 
"unlimited"},
  "ephemeral": true},
+   "keepalive": {
+ "type": {"key": "string", "value": "string", "min": 0, "max": 
"unlimited"},
+ "ephemeral": true},
"ovs_version": {
  "type": {"key": {"type": "string"},
   "min": 0, "max": 1}},
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 59c96df..fd4ba04 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -569,6 +569,26 @@
 the daemon.
   
 
+
+
+  
+One such key-value pair, with ID replaced by the
+core id, will exist for each active PMD thread.  The value is a
+comma-separated list of status of PMD core and last seen timestamp
+of PMD thread. In respective order, these values are:
+  
+
+  
+Status of PMD core.  Valid values include ALIVE, MISSING, DEAD,
+GONE, DOZING, SLEEPING.
+Last seen timestamp of the PMD core.
+  
+
+  
+This is only valid for OvS-DPDK Datapath and only PMD threads 
status
+is implemented.
+  
+
   
 
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 08/21] dpif-netdev: Register packet processing cores to KA framework.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit registers the packet processing PMD cores to keepalive
framework. Only PMDs that have rxqs mapped will be registered and
actively monitored by KA framework.

This commit spawns a keepalive thread that will dispatch heartbeats to
PMD cores. The pmd threads respond to heartbeats by marking themselves
alive. As long as PMD responds to heartbeats it is considered 'healthy'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 65 +++
 1 file changed, 65 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2f224db..2607b9a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -48,6 +48,7 @@
 #include "fat-rwlock.h"
 #include "flow.h"
 #include "hmapx.h"
+#include "keepalive.h"
 #include "latch.h"
 #include "netdev.h"
 #include "netdev-vport.h"
@@ -969,6 +970,62 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void *
+ovs_keepalive(void *f_)
+{
+struct dp_netdev *dp = f_;
+
+pthread_detach(pthread_self());
+
+for (;;) {
+ovsrcu_quiesce_start();
+usleep(get_ka_interval() * 1000);
+ovsrcu_quiesce_end();
+}
+
+return NULL;
+}
+
+static void
+ka_thread_start(struct dp_netdev *dp)
+{
+static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start()) {
+ovs_thread_create("ovs_keepalive", ovs_keepalive, dp);
+
+ovsthread_once_done();
+}
+}
+
+static void
+ka_register_datapath_threads(struct dp_netdev *dp)
+{
+int ka_init = get_ka_init_status();
+VLOG_DBG("Keepalive: Was initialization successful? [%s]",
+ka_init ? "Success" : "Failure");
+if (!ka_init) {
+return;
+}
+
+ka_thread_start(dp);
+
+struct dp_netdev_pmd_thread *pmd;
+CMAP_FOR_EACH(pmd, node, >poll_threads) {
+/* Skip PMD thread with no rxqs mapping. */
+if (!hmap_count(>poll_list)) {
+continue;
+}
+
+/*  Register only PMD threads. */
+if (pmd->core_id != NON_PMD_CORE_ID) {
+ka_register_pmd_thread(pmd->core_id);
+VLOG_DBG("Registered PMD thread Core [%d] to KA framework",
+  pmd->core_id);
+}
+}
+}
+
 static void
 dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
  void *aux)
@@ -3539,6 +3596,9 @@ reconfigure_datapath(struct dp_netdev *dp)
 
 /* Reload affected pmd threads. */
 reload_affected_pmds(dp);
+
+/* Register datapath threads for KA monitoring. */
+ka_register_datapath_threads(dp);
 }
 
 /* Returns true if one of the netdevs in 'dp' requires a reconfiguration */
@@ -3736,6 +3796,9 @@ reload:
poll_list[i].port_no);
 }
 
+/* Mark PMD thread alive. */
+ka_mark_pmd_thread_alive();
+
 if (lc++ > 1024) {
 bool reload;
 
@@ -3766,6 +3829,8 @@ reload:
 goto reload;
 }
 
+ka_unregister_pmd_thread(pmd->core_id);
+
 free(poll_list);
 pmd_free_cached_ports(pmd);
 return NULL;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 09/21] dpif-netdev: Dispatch heartbeats for DPDK datapath.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds heartbeat mechanism support for DPDK datapath. Heartbeats
are sent to registered PMD threads at predefined intervals, as set in ovsdb
using 'keepalive-interval'.

The heartbeats are only enabled when there is atleast one port added to
the bridge and with active PMD thread polling the port.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpdk-stub.c   | 6 ++
 lib/dpdk.c| 7 +++
 lib/dpdk.h| 2 ++
 lib/dpif-netdev.c | 5 +
 4 files changed, 20 insertions(+)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index d7fb19b..3353ddd 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -72,3 +72,9 @@ dpdk_mark_pmd_core_sleep(void)
 {
 /* Nothing */
 }
+
+void
+dispatch_heartbeats(void)
+{
+/* Nothing */
+}
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 3f5669b..8a9132a 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -543,3 +543,10 @@ dpdk_mark_pmd_core_sleep(void)
 {
 rte_keepalive_mark_sleep(rte_global_keepalive_info);
 }
+
+/* Dispatch pings */
+void
+dispatch_heartbeats(void)
+{
+rte_keepalive_dispatch_pings(NULL, rte_global_keepalive_info);
+}
diff --git a/lib/dpdk.h b/lib/dpdk.h
index dc830c4..7b7f06f 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -47,4 +47,6 @@ void dpdk_unregister_pmd_core(unsigned core_id);
 void dpdk_mark_pmd_core_alive(void);
 void dpdk_mark_pmd_core_sleep(void);
 
+void dispatch_heartbeats(void);
+
 #endif /* dpdk.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2607b9a..97c3807 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -978,6 +978,11 @@ ovs_keepalive(void *f_)
 pthread_detach(pthread_self());
 
 for (;;) {
+int n_pmds = cmap_count(>poll_threads) - 1;
+if (n_pmds > 0) {
+dispatch_heartbeats();
+}
+
 ovsrcu_quiesce_start();
 usleep(get_ka_interval() * 1000);
 ovsrcu_quiesce_end();
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 15/21] dpif-netdev: Add additional datapath health checks.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit enables additional datapath health checks. The checks
are enabled only on a PMD heartbeat failure. On missing three successive
heartbeats additional health checks needs to be performed on each PMD
to confirm and flag the failure.

The datapath health is monitored periodically from keepalive thread.
It should be noted that the PMD health checks are only performed on
the PMD threads whose health check is enabled.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 23 +++
 lib/keepalive.c   | 24 
 lib/keepalive.h   |  3 +++
 3 files changed, 50 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 6ac1bd3..b2f0611 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -970,6 +970,28 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void
+pmd_health_check(struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
+{
+/* Nothing */
+}
+
+static void
+get_datapath_health(struct dp_netdev *dp)
+{
+for (int core_id = 0; core_id < KEEPALIVE_MAXCORES; core_id++) {
+struct dp_netdev_pmd_thread *pmd;
+
+/* Check only PMD threads whose health check is enabled. */
+if (OVS_LIKELY(!ka_is_pmdhealth_check_needed(core_id))) {
+continue;
+}
+
+pmd = dp_netdev_get_pmd(dp, core_id);
+pmd_health_check(pmd);
+}
+}
+
 static void *
 ovs_keepalive(void *f_)
 {
@@ -981,6 +1003,7 @@ ovs_keepalive(void *f_)
 int n_pmds = cmap_count(>poll_threads) - 1;
 if (n_pmds > 0) {
 dispatch_heartbeats();
+get_datapath_health(dp);
 get_ka_stats();
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index da830ab..9fa9ad9 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -42,6 +42,8 @@ struct keepalive_shm *ka_shm = NULL;
 static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
 static struct smap *keepalive_stats OVS_GUARDED_BY(mutex);
 
+static bool ka_pmd_core_health[KEEPALIVE_MAXCORES] = {false};
+
 /* Return the Keepalive shared memory block name. */
 static inline const char *
 get_ka_shm_blk(void)
@@ -124,6 +126,28 @@ ka_mark_pmd_thread_sleep(void)
 }
 }
 
+void
+ka_enable_pmd_health_check(unsigned core_id)
+{
+if (is_ka_enabled()) {
+ka_pmd_core_health[core_id] = true;
+}
+}
+
+void
+ka_disable_pmd_health_check(unsigned core_id)
+{
+if (is_ka_enabled()) {
+ka_pmd_core_health[core_id] = false;
+}
+}
+
+bool
+ka_is_pmdhealth_check_needed(unsigned core_id)
+{
+return ka_pmd_core_health[core_id];
+}
+
 /* Retrieve and return the keepalive timer interval from OVSDB. */
 static uint32_t
 get_ka_timer_interval(const struct smap *ovs_other_config OVS_UNUSED)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 63f35f0..32ea729 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -60,6 +60,9 @@ void ka_register_pmd_thread(unsigned);
 void ka_unregister_pmd_thread(unsigned);
 void ka_mark_pmd_thread_alive(void);
 void ka_mark_pmd_thread_sleep(void);
+void ka_enable_pmd_health_check(unsigned);
+void ka_disable_pmd_health_check(unsigned);
+bool ka_is_pmdhealth_check_needed(unsigned);
 
 void ka_get_tid(unsigned core);
 bool is_ka_enabled(void);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 13/21] keepalive: Add support to query keepalive status.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds support to query if keepalive status is
enabled/disabled.

  $ ovs-appctl keepalive/status
keepAlive Status: Enabled

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 3048527..d74b4ab 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -338,6 +338,19 @@ ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int 
argc OVS_UNUSED,
 unixctl_command_reply(conn, ds_cstr());
 }
 
+static void
+ka_unixctl_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+
+ds_put_format(, "keepAlive Status: %s",
+  is_ka_enabled() ? "Enabled" : "Disabled");
+
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 static int
 ka_init__(void)
 {
@@ -379,6 +392,10 @@ ka_init(const struct smap *ovs_other_config)
 } else {
 VLOG_ERR("keepalive_shm_create() failed.");
 }
+
+unixctl_command_register("keepalive/status", "", 0, 0,
+  ka_unixctl_status, NULL);
+
 ovsthread_once_done(_enable);
 }
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH 18/21] keepalive: Check the PMD cycle stats as part of PMD health checks.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds the support to check the PMD cycle stats. If the cycles
aren't changing for a duration of time this can be flagged as possible
PMD stall.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 17 ++---
 lib/dpif-netdev.h |  6 ++
 lib/keepalive.c   | 50 ++
 lib/keepalive.h   |  4 
 4 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1d98c0b..3574b5c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -327,12 +327,6 @@ enum dp_stat_type {
 DP_N_STATS
 };
 
-enum pmd_cycles_counter_type {
-PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
-PMD_CYCLES_PROCESSING,  /* Cycles spent processing packets */
-PMD_N_CYCLES
-};
-
 #define XPS_TIMEOUT_MS 500LL
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
@@ -977,6 +971,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 struct rxq_poll *poll;
 int port_link_status = 0;
 int port_stats = 0;
+int pmd_polling = 0;
+uint64_t cycles[PMD_N_CYCLES];
 
 struct svec pmd_poll_list;
 svec_init(_poll_list);
@@ -1011,11 +1007,18 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 }
 svec_destroy(_poll_list);
 
+/* Update the cycle counters in SHM. */
+for (int idx = 0; idx < ARRAY_SIZE(cycles); idx++) {
+atomic_read_relaxed(>cycles.n[idx], [idx]);
+}
+
+pmd_polling = ka_shm_update_pmd_cycles(pmd->core_id, cycles);
+
 port_link_status = ka_get_polled_ports_status(pmd->core_id);
 port_stats = ka_get_polled_ports_stats(pmd->core_id);
 
 if (port_link_status == ACTIVE_RUN_STATE &&
-port_stats == ACTIVE_RUN_STATE ) {
+port_stats == ACTIVE_RUN_STATE && pmd_polling == ACTIVE_RUN_STATE) {
 ka_set_pmd_state(pmd->core_id, KA_STATE_ALIVE);
 } else {
 ka_set_pmd_state(pmd->core_id, KA_STATE_CHECK);
diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
index 6db6ed2..e7c2400 100644
--- a/lib/dpif-netdev.h
+++ b/lib/dpif-netdev.h
@@ -33,6 +33,12 @@ extern "C" {
  * headers to be aligned on a 4-byte boundary.  */
 enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
 
+enum pmd_cycles_counter_type {
+PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
+PMD_CYCLES_PROCESSING,  /* Cycles spent processing packets */
+PMD_N_CYCLES
+};
+
 bool dpif_is_netdev(const struct dpif *);
 
 #define NR_QUEUE   1
diff --git a/lib/keepalive.c b/lib/keepalive.c
index f513921..3b00d01 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -497,6 +497,56 @@ ka_shm_update_port_statistics(const struct netdev *netdev,
 state;
 }
 
+int
+ka_shm_update_pmd_cycles(int pmd_core_id, uint64_t cycles[PMD_N_CYCLES])
+{
+struct keepalive_shm *ka_shm = get_ka_shm();
+if (!ka_shm) {
+VLOG_ERR_RL(, "KeepAlive: Invalid shared memory block.");
+return -1;
+}
+
+uint64_t total_cycles = 0;
+for (int i = 0; i < PMD_N_CYCLES; i++) {
+if (cycles[i] > 0) {
+total_cycles += cycles[i];
+}
+}
+
+if (!total_cycles)
+return -1;
+
+int pmd_state = ka_get_pmd_state(pmd_core_id);
+VLOG_DBG("%s_%d: PMD_STATE:%d", __FUNCTION__, __LINE__, pmd_state);
+if (pmd_state == KA_STATE_CHECK) {
+uint64_t polling_cycles_cnt = 0, proc_cycles_cnt = 0;
+uint64_t prev_poll_cycles =
+ka_shm->ext_stats[pmd_core_id].cycles[PMD_CYCLES_POLLING];
+uint64_t prev_proc_cycles =
+ka_shm->ext_stats[pmd_core_id].cycles[PMD_CYCLES_PROCESSING];
+
+VLOG_DBG_RL(, "Keepalive: Going to check the PMD thresholds now.");
+
+polling_cycles_cnt = cycles[PMD_CYCLES_POLLING] - prev_poll_cycles;
+
+proc_cycles_cnt = cycles[PMD_CYCLES_PROCESSING]
+   - prev_proc_cycles;
+
+if (!polling_cycles_cnt && !proc_cycles_cnt) {
+VLOG_DBG("PMD FAILURE");
+return FAILURE_STATE;
+}
+} else {
+ka_shm->ext_stats[pmd_core_id].cycles[PMD_CYCLES_POLLING] =
+   cycles[PMD_CYCLES_POLLING];
+
+ka_shm->ext_stats[pmd_core_id].cycles[PMD_CYCLES_PROCESSING] =
+   cycles[PMD_CYCLES_PROCESSING];
+}
+
+return ACTIVE_RUN_STATE;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[] OVS_UNUSED, void *ka_shm_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index f9bdf12..d8e55d5 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -26,6 +26,7 @@
 #define KEEPALIVE_MAXCORES 128
 #endif /* DPDK_NETDEV */
 
+#include "dpif-netdev.h"
 #include "netdev.h"
 
 #define MAX_POLL_PORTS 20
@@ -61,6 +6

[ovs-dev] [RFC PATCH 19/21] netdev-dpdk: Enable PMD health checks on heartbeat failure.

2017-06-07 Thread Bhanuprakash Bodireddy
The keepalive thread sends heartbeats to PMD thread and when PMD fails to
respond to successive heartbeats the PMD is potentially stalled. The PMD
state transition is as below:

ALIVE -> MISSING -> DEAD -> GONE

This commit enables PMD healthchecks when PMD doesn't respond to
heartbeats. This is needed to handle false negatives. With this commit
the new state transition is as below:

ALIVE -> MISSING -> DEAD -> CHECK -> GONE

PMD Health checking state is introduced and will immediately kickin when
the PMD gets in to DEAD state. As part of this below are considered.

  - Link status of the ports polled by PMD thread.
  - Statistics of the ports polled by PMD thread.
  - PMD polling and processing cycles.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.h   |  3 +++
 lib/netdev-dpdk.c | 39 +--
 2 files changed, 40 insertions(+), 2 deletions(-)

diff --git a/lib/keepalive.h b/lib/keepalive.h
index d8e55d5..6d1733c 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -71,6 +71,9 @@ struct keepalive_shm {
 /* Last seen timestamp of the core */
 uint64_t core_last_seen_times[KEEPALIVE_MAXCORES];
 
+/* Number of PMD failures */
+uint32_t core_failures[KEEPALIVE_MAXCORES];
+
 /* Store pmd thread tid */
 pid_t thread_id[KEEPALIVE_MAXCORES];
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 74c1ab1..bb93fd8 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -606,6 +606,32 @@ dpdk_failcore_cb(void *ptr_data, const int core_id)
 }
 }
 
+static void
+dpdk_ka_handle_failure(struct keepalive_shm *ka_shm, const int core_id,
+   const enum rte_keepalive_state core_state, uint64_t last_alive)
+{
+if ((ka_shm->core_state[core_id] != KA_STATE_CHECK) &&
+(ka_shm->core_state[core_id] == KA_STATE_DEAD)) {
+ka_set_pmd_state(core_id, KA_STATE_CHECK);
+} else {
+/* The core failure has to be incremented only once when the
+ * state transition happens from CHECK -> GONE.
+ */
+if (ka_shm->core_state[core_id] == KA_STATE_CHECK) {
+ka_shm->core_failures[core_id]++;
+}
+
+/* Set the PMD core state to KA_STATE_GONE i.e failure. */
+ka_set_pmd_state(core_id, core_state);
+
+if (ka_is_pmdhealth_check_needed(core_id)) {
+ka_disable_pmd_health_check(core_id);
+}
+}
+
+ka_shm->core_last_seen_times[core_id] = last_alive;
+}
+
 /* Update the core state in shared memory.
  *
  * This function shall be invoked periodically to write the core status and
@@ -632,10 +658,19 @@ dpdk_ka_update_core_state(void *ptr_data, const int 
core_id,
 ka_shm->core_state[core_id] = KA_STATE_ALIVE;
 ka_shm->core_last_seen_times[core_id] = last_alive;
 break;
-case RTE_KA_STATE_DOZING:
-case RTE_KA_STATE_SLEEP:
 case RTE_KA_STATE_DEAD:
+/* Enable PMD health check here, as we are in penultimate state
+ * of declaring PMD as failed. */
+ka_enable_pmd_health_check(core_id);
+
+ka_shm->core_state[core_id] = core_state;
+ka_shm->core_last_seen_times[core_id] = last_alive;
+break;
 case RTE_KA_STATE_GONE:
+dpdk_ka_handle_failure(ka_shm, core_id, core_state, last_alive);
+break;
+case RTE_KA_STATE_DOZING:
+case RTE_KA_STATE_SLEEP:
 ka_shm->core_state[core_id] = core_state;
 ka_shm->core_last_seen_times[core_id] = last_alive;
 break;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/8] netdev-dpdk: Add netdev_dpdk_txq_drain function.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds netdev_dpdk_txq_drain() function. If there are
any packets waiting in the queue, they are transmitted instantly
using the rte_eth_tx_burst function. In XPS enabled case, lock is
taken on the tx queue before draining the queue.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
Signed-off-by: Markus Magnusson <markus.magnus...@ericsson.com>
Co-authored-by: Markus Magnusson <markus.magnus...@ericsson.com>
---
 lib/netdev-dpdk.c | 30 --
 1 file changed, 28 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index d9c8a5e..13b4487 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -292,6 +292,11 @@ struct dpdk_mp {
 struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
 };
 
+/* Queue 'INTERIM_QUEUE_BURST_THRESHOLD' packets before tranmitting.
+ * Defaults to 'NETDEV_MAX_BURST'(32) packets.
+ */
+#define INTERIM_QUEUE_BURST_THRESHOLD NETDEV_MAX_BURST
+
 /* There should be one 'struct dpdk_tx_queue' created for
  * each cpu core. */
 struct dpdk_tx_queue {
@@ -301,6 +306,12 @@ struct dpdk_tx_queue {
 * pmd threads (see 'concurrent_txq'). */
 int map;   /* Mapping of configured vhost-user queues
 * to enabled by guest. */
+int count; /* Number of buffered packets waiting to
+  be sent. */
+struct rte_mbuf *burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+   /* Intermediate queue where packets can
+* be buffered to amortize the cost of MMIO
+* writes. */
 };
 
 /* dpdk has no way to remove dpdk ring ethernet devices
@@ -1892,9 +1903,24 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
  * few packets (< INTERIM_QUEUE_BURST_THRESHOLD) are buffered in the queue.
  */
 static int
-netdev_dpdk_txq_drain(struct netdev *netdev OVS_UNUSED,
-  int qid OVS_UNUSED, bool concurrent_txq OVS_UNUSED)
+netdev_dpdk_txq_drain(struct netdev *netdev,
+  int qid, bool concurrent_txq)
 {
+struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+struct dpdk_tx_queue *txq = >tx_q[qid];
+
+if (OVS_LIKELY(txq->count)) {
+if (OVS_UNLIKELY(concurrent_txq)) {
+qid = qid % dev->up.n_txq;
+rte_spinlock_lock(>tx_q[qid].tx_lock);
+}
+
+netdev_dpdk_eth_tx_burst(dev, qid, txq->burst_pkts, txq->count);
+
+if (OVS_UNLIKELY(concurrent_txq)) {
+rte_spinlock_unlock(>tx_q[qid].tx_lock);
+}
+}
 return 0;
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 4/8] dpif-netdev: Drain the packets in intermediate queue.

2017-06-07 Thread Bhanuprakash Bodireddy
Under low rate traffic conditions, there can be 2 issues.
  (1) Packets potentially can get stuck in the intermediate queue.
  (2) Latency of the packets can increase significantly due to
   buffering in intermediate queue.

This commit handles the (1) issue by draining the tx port queues from
PMD processing loop. Also this commit addresses issue (2) by draining
the tx queues after every rxq port processing. This reduces the latency
with out impacting the forwarding throughput.

   MASTER
  
   Pkt size  min(ns)   avg(ns)   max(ns)
512  4,631  5,022309,914
   1024  5,545  5,749104,294
   1280  5,978  6,159 45,306
   1518  6,419  6,774946,850

  MASTER + COMMIT
  -
   Pkt size  min(ns)   avg(ns)   max(ns)
512  4,711  5,064182,477
   1024  5,601  5,888701,654
   1280  6,018  6,491533,037
   1518  6,467  6,734312,471

PMDs can be teared down and spawned at runtime and so the rxq and txq
mapping of the PMD threads can change. In few cases packets can get
stuck in the queue due to reconfiguration and this commit helps drain
the queues.

Suggested-by: Eelco Chaudron <echau...@redhat.com>
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/331039.html
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
Signed-off-by: Markus Magnusson <markus.magnus...@ericsson.com>
Co-authored-by: Markus Magnusson <markus.magnus...@ericsson.com>
---
 lib/dpif-netdev.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e1c43fe..12f8477 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3760,6 +3760,8 @@ reload:
 for (i = 0; i < poll_cnt; i++) {
 dp_netdev_process_rxq_port(pmd, poll_list[i].rx,
poll_list[i].port_no);
+
+dp_netdev_drain_txq_ports(pmd);
 }
 
 if (lc++ > 1024) {
@@ -3780,6 +3782,9 @@ reload:
 }
 }
 
+/* Drain the queues as part of reconfiguration logic. */
+dp_netdev_drain_txq_ports(pmd);
+
 poll_cnt = pmd_load_queues_and_ports(pmd, _list);
 exiting = latch_is_set(>exit_latch);
 /* Signal here to make sure the pmd finishes
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 6/8] netdev-dpdk: Enable intermediate queue for vHost User port.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit refactors the __netdev_dpdk_vhost_send() and enables
intermediate queue where in the packets are buffered till the threshold
'INTERIM_QUEUE_BURST_THRESHOLD[32] is hit and eventually gets transmitted.

This commit improves the throughput as reported below in simple Physical
to virtual testcase with higher flows @10G Line rate.

  Num FlowMaster  Commit
     =   =
  10  5945899 7833914
  32  3872211 6530133
  50  3283713 6618711
  100 3132540 5857226
  500 2964499 5273006
  10002931952 5178038

Latency stats:

  MASTER
  ---
  Pkt size  min(ns)  avg(ns)  max(ns)
  512   10,011   12,100   281,915
  1024   7,8709,313   193,116
  1280   7,8629,036   194,439
  1518   8,2159,417   204,782

  MASTER + COMMIT
  ---
  Pkt size  min(ns)  avg(ns)  max(ns)
  512   10,492   13,655   281,538
  1024   8,4079,784   205,095
  1280   8,3999,750   194,888
  1518   8,3679,722   196,973

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332271.html
 [By Eelco Chaudron <echau...@redhat.com>]
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
---
 lib/netdev-dpdk.c | 38 +++---
 1 file changed, 15 insertions(+), 23 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index dfaa3cd..765718e 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1784,16 +1784,21 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
 unsigned int total_pkts = cnt;
 unsigned int dropped = 0;
-int i, retries = 0;
+int i;
 
 qid = dev->tx_q[qid % netdev->n_txq].map;
+struct dpdk_tx_queue *txq = >tx_q[qid];
 
 if (OVS_UNLIKELY(!is_vhost_running(dev) || qid < 0
  || !(dev->flags & NETDEV_UP))) {
 rte_spinlock_lock(>stats_lock);
 dev->stats.tx_dropped+= cnt;
 rte_spinlock_unlock(>stats_lock);
-goto out;
+
+for (i = 0; i < total_pkts; i++) {
+dp_packet_delete(pkts[i]);
+}
+return;
 }
 
 rte_spinlock_lock(>tx_q[qid].tx_lock);
@@ -1803,34 +1808,21 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
 dropped = total_pkts - cnt;
 
-do {
-int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
-unsigned int tx_pkts;
-
-tx_pkts = rte_vhost_enqueue_burst(netdev_dpdk_get_vid(dev),
-  vhost_qid, cur_pkts, cnt);
-if (OVS_LIKELY(tx_pkts)) {
-/* Packets have been sent.*/
-cnt -= tx_pkts;
-/* Prepare for possible retry.*/
-cur_pkts = _pkts[tx_pkts];
-} else {
-/* No packets sent - do not retry.*/
-break;
+int idx = 0;
+while (idx < cnt) {
+txq->pkts[txq->vhost_pkt_cnt++] = pkts[idx++];
+
+if (txq->vhost_pkt_cnt >= INTERIM_QUEUE_BURST_THRESHOLD) {
+dropped += netdev_dpdk_vhost_tx_burst(dev, qid);
 }
-} while (cnt && (retries++ <= VHOST_ENQ_RETRY_NUM));
+}
 
 rte_spinlock_unlock(>tx_q[qid].tx_lock);
 
 rte_spinlock_lock(>stats_lock);
 netdev_dpdk_vhost_update_tx_counters(>stats, pkts, total_pkts,
- cnt + dropped);
+ dropped);
 rte_spinlock_unlock(>stats_lock);
-
-out:
-for (i = 0; i < total_pkts - dropped; i++) {
-dp_packet_delete(pkts[i]);
-}
 }
 
 /* Tx function. Transmit packets indefinitely */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 5/8] netdev-dpdk: Add netdev_dpdk_vhost_txq_drain function.

2017-06-07 Thread Bhanuprakash Bodireddy
Add netdev_dpdk_vhost_txq_drain(), that flushes packets on vHost User
port queues. Also add netdev_dpdk_vhost_tx_burst() function that
uses rte_vhost_enqueue_burst() to enqueue burst of packets on vHost User
ports.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
---
 lib/netdev-dpdk.c | 67 +--
 1 file changed, 65 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4a9d9aa..dfaa3cd 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -308,10 +308,15 @@ struct dpdk_tx_queue {
 * to enabled by guest. */
 int count; /* Number of buffered packets waiting to
   be sent. */
+int vhost_pkt_cnt; /* Number of buffered packets waiting to
+  be sent on vhost port */
 struct rte_mbuf *burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
/* Intermediate queue where packets can
 * be buffered to amortize the cost of MMIO
 * writes. */
+struct dp_packet *pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+   /* Intermediate queue where packets can
+* be buffered for vhost ports */
 };
 
 /* dpdk has no way to remove dpdk ring ethernet devices
@@ -1714,6 +1719,63 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_stats 
*stats,
 }
 }
 
+static int
+netdev_dpdk_vhost_tx_burst(struct netdev_dpdk *dev, int qid)
+{
+struct dpdk_tx_queue *txq = >tx_q[qid];
+struct rte_mbuf **cur_pkts = (struct rte_mbuf **)txq->pkts;
+
+int tx_vid = netdev_dpdk_get_vid(dev);
+int tx_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
+uint32_t sent = 0;
+uint32_t retries = 0;
+uint32_t sum, total_pkts;
+
+total_pkts = sum = txq->vhost_pkt_cnt;
+do {
+uint32_t ret;
+ret = rte_vhost_enqueue_burst(tx_vid, tx_qid, _pkts[sent], sum);
+if (OVS_UNLIKELY(!ret)) {
+/* No packets enqueued - do not retry. */
+break;
+} else {
+/* Packet have been sent */
+sent += ret;
+
+/* 'sum; packet have to be retransmitted */
+sum -= ret;
+}
+} while (sum && (retries++ < VHOST_ENQ_RETRY_NUM));
+
+for (int i=0; i < total_pkts; i++) {
+dp_packet_delete(txq->pkts[i]);
+}
+
+/* Reset pkt count */
+txq->vhost_pkt_cnt = 0;
+
+/* 'sum' refers to packets dropped */
+return sum;
+}
+
+/* Drain the txq if there are any packets available.
+ * dynamic_txqs/concurrent_txq is disabled for vHost User ports as
+ * 'OVS_VHOST_MAX_QUEUE_NUM[1024]' txqs are preallocated.
+ */
+static int
+netdev_dpdk_vhost_txq_drain(struct netdev *netdev, int qid,
+bool concurrent_txq OVS_UNUSED)
+{
+struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+struct dpdk_tx_queue *txq = >tx_q[qid];
+
+if (OVS_LIKELY(txq->vhost_pkt_cnt)) {
+netdev_dpdk_vhost_tx_burst(dev, qid);
+}
+
+return 0;
+}
+
 static void
 __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
  struct dp_packet **pkts, int cnt)
@@ -3425,7 +3487,8 @@ static const struct netdev_class dpdk_vhost_class =
 NULL,
 netdev_dpdk_vhost_reconfigure,
 netdev_dpdk_vhost_rxq_recv,
-NULL);
+netdev_dpdk_vhost_txq_drain);
+
 static const struct netdev_class dpdk_vhost_client_class =
 NETDEV_DPDK_CLASS(
 "dpdkvhostuserclient",
@@ -3441,7 +3504,7 @@ static const struct netdev_class dpdk_vhost_client_class =
 NULL,
 netdev_dpdk_vhost_client_reconfigure,
 netdev_dpdk_vhost_rxq_recv,
-NULL);
+netdev_dpdk_vhost_txq_drain);
 
 void
 netdev_dpdk_register(void)
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 7/8] netdev-dpdk: Configurable retries while enqueuing to vHost User ports.

2017-06-07 Thread Bhanuprakash Bodireddy
This commit adds "vhost-enque-retry" where in the number of retries
performed while enqueuing packets to vHostUser ports can be configured
in ovsdb.

Currently number of retries are set to '8' and a retry is performed
when atleast some packets have been successfully sent on previous attempt.
While this approach works well, it causes throughput drop when multiple
vHost User ports are servied by same PMD thread.

This commit by default disables retry mechanism and if retry logic needed
the number of retries can be set in ovsdb. For example if a maximum of
3 retries has to be performed with atleast some pkts successfully
enqueued in previous attempt, set below:

  $ ovs-vsctl set Open_vSwitch . other_config:vhost-enque-retry=3

CC: Kevin Traynor <ktray...@redhat.com>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
---
 lib/dpdk.c   | 10 ++
 lib/dpdk.h   |  1 +
 lib/netdev-dpdk.c|  4 ++--
 vswitchd/vswitch.xml | 12 
 4 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/lib/dpdk.c b/lib/dpdk.c
index 8da6c32..77c8274 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -41,6 +41,7 @@ VLOG_DEFINE_THIS_MODULE(dpdk);
 static FILE *log_stream = NULL;   /* Stream for DPDK log redirection */
 
 static char *vhost_sock_dir = NULL;   /* Location of vhost-user sockets */
+static int vhost_enq_retries_num = 0;
 
 static int
 process_vhost_flags(char *flag, const char *default_val, int size,
@@ -345,6 +346,9 @@ dpdk_init__(const struct smap *ovs_other_config)
 vhost_sock_dir = sock_dir_subcomponent;
 }
 
+vhost_enq_retries_num = smap_get_int(ovs_other_config,
+  "vhost-enque-retry", 0);
+
 argv = grow_argv(, 0, 1);
 argc = 1;
 argv[0] = xstrdup(ovs_get_program_name());
@@ -489,3 +493,9 @@ dpdk_set_lcore_id(unsigned cpu)
 ovs_assert(cpu != NON_PMD_CORE_ID);
 RTE_PER_LCORE(_lcore_id) = cpu;
 }
+
+int
+dpdk_get_vhost_retries(void)
+{
+return vhost_enq_retries_num;
+}
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 673a1f1..9bbd49c 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -35,5 +35,6 @@ struct smap;
 void dpdk_init(const struct smap *ovs_other_config);
 void dpdk_set_lcore_id(unsigned cpu);
 const char *dpdk_get_vhost_sock_dir(void);
+int dpdk_get_vhost_retries(void);
 
 #endif /* dpdk.h */
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 765718e..a092412 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -146,7 +146,6 @@ BUILD_ASSERT_DECL((MAX_NB_MBUF / 
ROUND_DOWN_POW2(MAX_NB_MBUF/MIN_NB_MBUF))
 /* DPDK library uses uint8_t for port_id. */
 typedef uint8_t dpdk_port_t;
 
-#define VHOST_ENQ_RETRY_NUM 8
 #define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ)
 
 static const struct rte_eth_conf port_conf = {
@@ -1727,6 +1726,7 @@ netdev_dpdk_vhost_tx_burst(struct netdev_dpdk *dev, int 
qid)
 
 int tx_vid = netdev_dpdk_get_vid(dev);
 int tx_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
+int vhost_retries = dpdk_get_vhost_retries();
 uint32_t sent = 0;
 uint32_t retries = 0;
 uint32_t sum, total_pkts;
@@ -1745,7 +1745,7 @@ netdev_dpdk_vhost_tx_burst(struct netdev_dpdk *dev, int 
qid)
 /* 'sum; packet have to be retransmitted */
 sum -= ret;
 }
-} while (sum && (retries++ < VHOST_ENQ_RETRY_NUM));
+} while (sum && (retries++ < vhost_retries));
 
 for (int i=0; i < total_pkts; i++) {
 dp_packet_delete(txq->pkts[i]);
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 892f839..f19fa03 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -307,6 +307,18 @@
 
   
 
+  
+
+  Specifies the number of retries performed while enqueuing packets
+  on to the vhost user ports. If this value is unset, no retries by
+  default is performed on the enqueue side.
+
+
+  Changing this value requires restarting the daemon.
+
+  
+
   
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 08/18] keepalive: Retrieve PMD status periodically.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit implements APIs to retrieve the PMD thread status and return
the status in the below format for each PMD thread.

  Format: PMDID="STATUS,core id,last_seen_timestamp"
  eg: PMD62="ALIVE,2,9220698256784207"
  PMD63="GONE,3,9220698256786231"

The status is periodically retrieved by keepalive thread and stored in
keepalive_stats struc which later shall be retrieved by vswitchd thread.
In case of four PMD threads the status is as below:

   "PMD62"="ALIVE,0,9220698256784207"
   "PMD63"="ALIVE,1,9220698256784913"
   "PMD64"="ALIVE,2,9220698256785902"
   "PMD65"="ALIVE,3,9220698256786231"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  1 +
 lib/keepalive.c   | 73 +++
 lib/keepalive.h   |  1 +
 3 files changed, 75 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index cf4d68c..93bda20 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -982,6 +982,7 @@ ovs_keepalive(void *f_)
 int n_pmds = cmap_count(>poll_threads) - 1;
 if (n_pmds > 0) {
 dispatch_heartbeats();
+get_ka_stats();
 }
 
 ovsrcu_quiesce_start();
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 353f1d1..b437bef 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -25,6 +25,7 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "ovs-thread.h"
 #include "process.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
@@ -34,6 +35,9 @@ static bool ka_init_status = ka_init_failure; /* Keepalive 
initialization */
 static uint32_t keepalive_timer_interval; /* keepalive timer interval */
 static struct keepalive_info *ka_info = NULL;
 
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct smap *keepalive_stats OVS_GUARDED_BY(mutex);
+
 inline bool
 ka_is_enabled(void)
 {
@@ -257,6 +261,75 @@ keepalive_info_create(void)
 return ka_info;
 }
 
+static void
+get_pmd_status(struct smap *ka_pmd_stats)
+OVS_REQUIRES(ka_info->proclist_mutex)
+{
+if (OVS_UNLIKELY(!ka_info)) {
+return;
+}
+
+struct ka_process_info *pinfo, *pinfo_next;
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+int core_id = pinfo->core_id;
+char *state = NULL;
+if (pinfo->core_state == KA_STATE_UNUSED ||
+   pinfo->core_state == KA_STATE_SLEEP ) {
+continue;
+}
+
+switch (pinfo->core_state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+
+smap_add_format(ka_pmd_stats, pinfo->name, "%s,%d,%ld",
+state, core_id, pinfo->core_last_seen_times);
+}
+}
+
+void
+get_ka_stats(void)
+{
+struct smap *ka_pmd_stats;
+ka_pmd_stats = xmalloc(sizeof *ka_pmd_stats);
+smap_init(ka_pmd_stats);
+
+ovs_mutex_lock(_info->proclist_mutex);
+get_pmd_status(ka_pmd_stats);
+ovs_mutex_unlock(_info->proclist_mutex);
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+smap_destroy(keepalive_stats);
+free(keepalive_stats);
+keepalive_stats = NULL;
+}
+keepalive_stats = ka_pmd_stats;
+ovs_mutex_unlock();
+}
+
 static int
 ka_init__(void)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index cfe02e5..bdec34f 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -101,6 +101,7 @@ int get_ka_init_status(void);
 int ka_get_pmd_tid(unsigned core);
 int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
+void get_ka_stats(void);
 
 void dispatch_heartbeats(void);
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 06/18] dpif-netdev: Register packet processing cores to KA framework.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit registers the packet processing PMD cores to keepalive
framework. Only PMDs that have rxqs mapped will be registered and
actively monitored by KA framework.

This commit spawns a keepalive thread that will dispatch heartbeats to
PMD cores. The pmd threads respond to heartbeats by marking themselves
alive. As long as PMD responds to heartbeats it is considered 'healthy'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 100 +
 lib/keepalive.c   | 130 +++---
 lib/keepalive.h   |  25 ++-
 3 files changed, 236 insertions(+), 19 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ce141e8..4b7c835 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -72,6 +72,7 @@
 #include "seq.h"
 #include "smap.h"
 #include "sset.h"
+#include "svec.h"
 #include "timeval.h"
 #include "tnl-neigh-cache.h"
 #include "tnl-ports.h"
@@ -970,6 +971,96 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void *
+ovs_keepalive(void *f_ OVS_UNUSED)
+{
+pthread_detach(pthread_self());
+
+for (;;) {
+ovsrcu_quiesce_start();
+usleep(get_ka_interval() * 1000);
+ovsrcu_quiesce_end();
+}
+
+return NULL;
+}
+
+static void
+ka_thread_start(struct dp_netdev *dp)
+{
+static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start()) {
+ovs_thread_create("ovs_keepalive", ovs_keepalive, dp);
+
+ovsthread_once_done();
+}
+}
+
+static void
+pmd_num_poll_ports(struct dp_netdev_pmd_thread *pmd, int *num_poll_ports)
+{
+struct svec pmd_port_poll_list;
+svec_init(_port_poll_list);
+
+struct rxq_poll *poll;
+const char *port_name;
+int i = 0;
+
+HMAP_FOR_EACH (poll, node, >poll_list) {
+svec_add(_port_poll_list, netdev_rxq_get_name(poll->rxq->rx));
+}
+/* With MQ enabled, remove the duplicates. */
+svec_sort_unique(_port_poll_list);
+SVEC_FOR_EACH (i, port_name, _port_poll_list) {
+VLOG_DBG("%d Port:%s", i, port_name);
+}
+svec_destroy(_port_poll_list);
+
+*num_poll_ports = i;
+VLOG_DBG("PMD thread [%d] polling [%d] ports",
+ pmd->core_id, *num_poll_ports);
+}
+
+static void
+ka_register_datapath_threads(struct dp_netdev *dp)
+{
+int ka_init = get_ka_init_status();
+VLOG_DBG("Keepalive: Was initialization successful? [%s]",
+ka_init ? "Success" : "Failure");
+if (!ka_init) {
+return;
+}
+
+ka_thread_start(dp);
+
+struct dp_netdev_pmd_thread *pmd;
+CMAP_FOR_EACH (pmd, node, >poll_threads) {
+/* Skip PMD thread with no rxqs mapping. */
+if (OVS_UNLIKELY(!hmap_count(>poll_list))) {
+continue;
+}
+
+/*  Register only PMD threads. */
+if (pmd->core_id != NON_PMD_CORE_ID) {
+int err;
+int nports;
+pmd_num_poll_ports(pmd, );
+err = ka_alloc_portstats(pmd->core_id, nports);
+if (err) {
+VLOG_FATAL("Unable to allocate memory for PMD core %d",
+pmd->core_id);
+return;
+}
+
+int tid = ka_get_pmd_tid(pmd->core_id);
+ka_register_thread(tid, true);
+VLOG_DBG("Registered PMD thread [%d] on Core [%d] to KA framework",
+  tid, pmd->core_id);
+}
+}
+}
+
 static void
 dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
  void *aux)
@@ -3541,6 +3632,9 @@ reconfigure_datapath(struct dp_netdev *dp)
 
 /* Reload affected pmd threads. */
 reload_affected_pmds(dp);
+
+/* Register datapath threads to KA monitoring. */
+ka_register_datapath_threads(dp);
 }
 
 /* Returns true if one of the netdevs in 'dp' requires a reconfiguration */
@@ -3740,6 +3834,9 @@ reload:
poll_list[i].port_no);
 }
 
+/* Mark PMD thread alive. */
+ka_mark_pmd_thread_alive();
+
 if (lc++ > 1024) {
 bool reload;
 
@@ -3770,6 +3867,9 @@ reload:
 goto reload;
 }
 
+int tid = ka_get_pmd_tid(pmd->core_id);
+ka_unregister_thread(tid, true);
+
 free(poll_list);
 pmd_free_cached_ports(pmd);
 return NULL;
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 54faf49..64ab117 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -25,6 +25,7 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "process.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 
@@ -76,21 +77,77 @@ ka_store_pmd_id

[ovs-dev] [RFC PATCH v3 07/18] dpif-netdev: Enable heartbeats for DPDK datapath.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds heartbeat mechanism support for DPDK datapath. Heartbeats
are sent to registered PMD threads at predefined intervals (as set in ovsdb
with 'keepalive-interval').

The heartbeats are only enabled when there is atleast one port added to
the bridge and with active PMD thread polling the port.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpdk-stub.c   | 6 ++
 lib/dpdk.c| 7 +++
 lib/dpdk.h| 2 ++
 lib/dpif-netdev.c | 9 -
 lib/keepalive.c   | 9 +
 lib/keepalive.h   | 1 +
 6 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index d7fb19b..bf7b891 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -72,3 +72,9 @@ dpdk_mark_pmd_core_sleep(void)
 {
 /* Nothing */
 }
+
+void
+dpdk_dispatch_pmd_hb(void)
+{
+/* Nothing */
+}
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 917ef58..231d045 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -537,3 +537,10 @@ dpdk_mark_pmd_core_sleep(void)
 {
 rte_keepalive_mark_sleep(rte_global_keepalive_info);
 }
+
+/* Dispatch pings */
+void
+dpdk_dispatch_pmd_hb(void)
+{
+rte_keepalive_dispatch_pings(NULL, rte_global_keepalive_info);
+}
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 177624d..9fb438d 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -48,4 +48,6 @@ void dpdk_unregister_pmd_core(unsigned core_id);
 void dpdk_mark_pmd_core_alive(void);
 void dpdk_mark_pmd_core_sleep(void);
 
+void dpdk_dispatch_pmd_hb(void);
+
 #endif /* dpdk.h */
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4b7c835..cf4d68c 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -972,11 +972,18 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 }
 
 static void *
-ovs_keepalive(void *f_ OVS_UNUSED)
+ovs_keepalive(void *f_)
 {
+struct dp_netdev *dp = f_;
+
 pthread_detach(pthread_self());
 
 for (;;) {
+int n_pmds = cmap_count(>poll_threads) - 1;
+if (n_pmds > 0) {
+dispatch_heartbeats();
+}
+
 ovsrcu_quiesce_start();
 usleep(get_ka_interval() * 1000);
 ovsrcu_quiesce_end();
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 64ab117..353f1d1 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -239,6 +239,15 @@ ka_destroy_portstats(void)
 }
 }
 
+/* Dispatch pings */
+void
+dispatch_heartbeats(void)
+{
+#ifdef DPDK_NETDEV
+dpdk_dispatch_pmd_hb();
+#endif
+}
+
 static struct keepalive_info *
 keepalive_info_create(void)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index f1e232d..cfe02e5 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -102,4 +102,5 @@ int ka_get_pmd_tid(unsigned core);
 int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
 
+void dispatch_heartbeats(void);
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 09/18] bridge: Update keepalive status in OVSDB

2017-06-18 Thread Bhanuprakash Bodireddy
This commit allows vswitchd thread to update the OVSDB with the
status of all registered PMD threads. The status can be monitored
using ovsdb-client and the sample output is below.

$ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive

rowaction keepalive
7b746190-ee71-4dcc-becf-f8cb9c7cb909 old  { "PMD62"="ALIVE,0,9226457935188922"
"PMD63"="ALIVE,1,9226457935189628"
"PMD64"="ALIVE,2,9226457935189897"
"PMD65"="ALIVE,3,9226457935190127"}

 new  { "PMD62"="ALIVE,0,9226460230167364"
"PMD63"="ALIVE,1,9226460230168100"
"PMD64"="ALIVE,2,9226460230168905"
"PMD65"="ALIVE,3,9226460230169632"}

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c   | 15 +++
 lib/keepalive.h   |  1 +
 vswitchd/bridge.c | 26 ++
 3 files changed, 42 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index b437bef..f0b75f0 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -330,6 +330,21 @@ get_ka_stats(void)
 ovs_mutex_unlock();
 }
 
+struct smap *
+ka_stats_run(void)
+{
+struct smap *ka_stats = NULL;
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+ka_stats = keepalive_stats;
+keepalive_stats = NULL;
+}
+ovs_mutex_unlock();
+
+return ka_stats;
+}
+
 static int
 ka_init__(void)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index bdec34f..356e761 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -102,6 +102,7 @@ int ka_get_pmd_tid(unsigned core);
 int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
 void get_ka_stats(void);
+struct smap *ka_stats_run(void);
 
 void dispatch_heartbeats(void);
 #endif /* keepalive.h */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index 3927b9f..4b6b528 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -286,6 +286,7 @@ static bool port_is_synthetic(const struct port *);
 
 static void reconfigure_system_stats(const struct ovsrec_open_vswitch *);
 static void run_system_stats(void);
+static void run_keepalive_stats(void);
 
 static void bridge_configure_mirrors(struct bridge *);
 static struct mirror *mirror_create(struct bridge *,
@@ -403,6 +404,7 @@ bridge_init(const char *remote)
 
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_cur_cfg);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_statistics);
+ovsdb_idl_omit_alert(idl, _open_vswitch_col_keepalive);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_datapath_types);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_iface_types);
 ovsdb_idl_omit(idl, _open_vswitch_col_external_ids);
@@ -2690,6 +2692,29 @@ run_system_stats(void)
 }
 }
 
+void
+run_keepalive_stats(void)
+{
+struct smap *ka_stats;
+const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(idl);
+
+ka_stats = ka_stats_run();
+if (ka_stats && cfg) {
+struct ovsdb_idl_txn *txn;
+struct ovsdb_datum datum;
+
+txn = ovsdb_idl_txn_create(idl);
+ovsdb_datum_from_smap(, ka_stats);
+smap_destroy(ka_stats);
+ovsdb_idl_txn_write(>header_, _open_vswitch_col_keepalive,
+);
+ovsdb_idl_txn_commit(txn);
+ovsdb_idl_txn_destroy(txn);
+
+free(ka_stats);
+}
+}
+
 static const char *
 ofp12_controller_role_to_str(enum ofp12_controller_role role)
 {
@@ -3039,6 +3064,7 @@ bridge_run(void)
 run_stats_update();
 run_status_update();
 run_system_stats();
+run_keepalive_stats();
 }
 
 void
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 10/18] keepalive: Add support to query keepalive statistics.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds support to query keepalive statistics. Datapath health
status can be retrieved as follows:

  $ ovs-appctl keepalive/pmd-health-show

  Keepalive status

keepalive status  : Enabled
keepalive interval: 1000 ms
PMD threads   : 8

 PMDCORESTATE   LAST SEEN TIMESTAMP
pmd620  ALIVE   8632183482028293
pmd631  ALIVE   8632183482028425
pmd642  ALIVE   8632190191004294
pmd653  ALIVE   8632183482028525
pmd664  GONE8612183482028117
pmd675  ALIVE   8632190191004984
pmd686  ALIVE   8632190191005713
pmd697  ALIVE   8632190191006555

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 78 +
 1 file changed, 78 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index f0b75f0..a4b8d01 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -24,9 +24,11 @@
 #include "dpdk.h"
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
 #include "process.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 
@@ -345,6 +347,79 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+static void
+ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_info_)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\n\t\tKeepalive status\n\n");
+
+ds_put_format(, "keepalive status  : %s\n",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+if (!ka_is_enabled()) {
+goto out;
+}
+
+ds_put_format(, "keepalive interval: %"PRIu32" ms\n",
+  get_ka_interval());
+
+struct keepalive_info *ka_info = (struct keepalive_info *)ka_info_;
+if (OVS_UNLIKELY(!ka_info)) {
+goto out;
+}
+
+ds_put_format(, "PMD threads   : %"PRIu32" \n", ka_info->pmd_cnt);
+ds_put_format(,
+  "\n PMD\tCORE\tSTATE\tLAST SEEN TIMESTAMP\n");
+
+struct ka_process_info *pinfo, *pinfo_next;
+
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+char *state = NULL;
+if (pinfo->core_state == KA_STATE_UNUSED ||
+  pinfo->core_state == KA_STATE_SLEEP)
+continue;
+
+switch (pinfo->core_state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+ds_put_format(, "%s\t%2d\t%s\t%"PRIu64"\n",
+  pinfo->name, pinfo->core_id, state,
+  pinfo->core_last_seen_times);
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+
+ds_put_format(, "\n");
+out:
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 static int
 ka_init__(void)
 {
@@ -389,6 +464,9 @@ ka_init(const struct smap *ovs_other_config)
 VLOG_INFO("OvS Keepalive disabled.");
 }
 
+unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
+  ka_unixctl_pmd_health_show, ka_info);
+
 ovsthread_once_done(_enable);
 }
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/6] netdev-dpdk: Add intermediate queue support.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit introduces netdev_dpdk_eth_tx_queue() function that
implements intermediate queue and packet buffering. The packets get
buffered till the threshold 'INTERIM_QUEUE_BURST_THRESHOLD[32] is
reached and eventually gets transmitted.

To handle the case(eg: ping) where packets are sent at low rate and
can potentially get stuck in the queue, flush logic is implemented
that gets invoked from dp_netdev_flush_txq_ports() as part of PMD packet
processing loop.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
Signed-off-by: Markus Magnusson <markus.magnus...@ericsson.com>
Co-authored-by: Markus Magnusson <markus.magnus...@ericsson.com>
Acked-by: Eelco Chaudron <echau...@redhat.com>
---
 lib/dpif-netdev.c | 44 +++-
 lib/netdev-dpdk.c | 35 ++-
 2 files changed, 77 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2b65dc7..d59208e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -332,6 +332,7 @@ enum pmd_cycles_counter_type {
 };
 
 #define XPS_TIMEOUT_MS 500LL
+#define LAST_USED_QID_NONE -1
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
 struct dp_netdev_rxq {
@@ -492,7 +493,13 @@ struct rxq_poll {
 struct tx_port {
 struct dp_netdev_port *port;
 int qid;
-long long last_used;
+int last_used_qid;/* Last queue id where packets got
+ enqueued. */
+long long last_used;  /* In case XPS is enabled, it contains the
+   * timestamp of the last time the port was
+   * used by the thread to send data.  After
+   * XPS_TIMEOUT_MS elapses the qid will be
+   * marked as -1. */
 struct hmap_node node;
 };
 
@@ -3081,6 +3088,25 @@ cycles_count_end(struct dp_netdev_pmd_thread *pmd,
 }
 
 static void
+dp_netdev_flush_txq_ports(struct dp_netdev_pmd_thread *pmd)
+{
+struct tx_port *cached_tx_port;
+int tx_qid;
+
+HMAP_FOR_EACH (cached_tx_port, node, >send_port_cache) {
+tx_qid = cached_tx_port->last_used_qid;
+
+if (tx_qid != LAST_USED_QID_NONE) {
+netdev_txq_flush(cached_tx_port->port->netdev, tx_qid,
+ cached_tx_port->port->dynamic_txqs);
+
+/* Queue flushed and mark it empty. */
+cached_tx_port->last_used_qid = LAST_USED_QID_NONE;
+}
+}
+}
+
+static void
 dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread *pmd,
struct netdev_rxq *rx,
odp_port_t port_no)
@@ -4356,6 +4382,7 @@ dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread 
*pmd,
 
 tx->port = port;
 tx->qid = -1;
+tx->last_used_qid = LAST_USED_QID_NONE;
 
 hmap_insert(>tx_ports, >node, hash_port_no(tx->port->port_no));
 pmd->need_reload = true;
@@ -4926,6 +4953,14 @@ dpif_netdev_xps_get_tx_qid(const struct 
dp_netdev_pmd_thread *pmd,
 
 dpif_netdev_xps_revalidate_pmd(pmd, now, false);
 
+/* The tx queue can change in XPS case, make sure packets in previous
+ * queue is flushed properly. */
+if (tx->last_used_qid != LAST_USED_QID_NONE &&
+   tx->qid != tx->last_used_qid) {
+netdev_txq_flush(port->netdev, tx->last_used_qid, port->dynamic_txqs);
+tx->last_used_qid = LAST_USED_QID_NONE;
+}
+
 VLOG_DBG("Core %d: New TX queue ID %d for port \'%s\'.",
  pmd->core_id, tx->qid, netdev_get_name(tx->port->netdev));
 return min_qid;
@@ -5021,6 +5056,13 @@ dp_execute_cb(void *aux_, struct dp_packet_batch 
*packets_,
 tx_qid = pmd->static_tx_qid;
 }
 
+/* In case these packets gets buffered into an intermediate
+ * queue and XPS is enabled the flush function could find a
+ * different tx qid assigned to its thread.  We keep track
+ * of the qid we're now using, that will trigger the flush
+ * function and will select the right queue to flush. */
+p->last_used_qid = tx_qid;
+
 netdev_send(p->port->netdev, tx_qid, packets_, may_steal,
 dynamic_txqs);
 return;
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1e83116..50a9a2c 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1434,6 +1434,7 @@ static inline int
 netdev_dpdk_eth_tx_burst(struct netdev_dpdk *dev, int qid,
  struct rte_mbuf **pkts, int cnt)
 {
+struct dpdk_tx_queue *txq = >tx_q[qid];
 uint32_t nb_tx = 0;
 
 while (nb_tx != cnt) {
@@ -1457,6 +1458,7 @@ netdev_dp

[ovs-dev] [PATCH 4/6] dpif-netdev: Flush the packets in intermediate queue.

2017-06-18 Thread Bhanuprakash Bodireddy
Under low rate traffic conditions, there can be 2 issues.
  (1) Packets potentially can get stuck in the intermediate queue.
  (2) Latency of the packets can increase significantly due to
   buffering in intermediate queue.

This commit handles the (1) issue by flushing the tx port queues from
PMD processing loop. Also this commit addresses issue (2) by flushing
the tx queues after every rxq port processing. This reduces the latency
with out impacting the forwarding throughput.

   MASTER
  
   Pkt size  min(ns)   avg(ns)   max(ns)
512  4,631  5,022309,914
   1024  5,545  5,749104,294
   1280  5,978  6,159 45,306
   1518  6,419  6,774946,850

  MASTER + COMMIT
  -
   Pkt size  min(ns)   avg(ns)   max(ns)
512  4,711  5,064182,477
   1024  5,601  5,888701,654
   1280  6,018  6,491533,037
   1518  6,467  6,734312,471

PMDs can be teared down and spawned at runtime and so the rxq and txq
mapping of the PMD threads can change. In few cases packets can get
stuck in the queue due to reconfiguration and this commit helps flush
the queues.

Suggested-by: Eelco Chaudron <echau...@redhat.com>
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/331039.html
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
Signed-off-by: Markus Magnusson <markus.magnus...@ericsson.com>
Co-authored-by: Markus Magnusson <markus.magnus...@ericsson.com>
Acked-by: Eelco Chaudron <echau...@redhat.com>
---
 lib/dpif-netdev.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index d59208e..dfd88aa 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3761,6 +3761,8 @@ reload:
 for (i = 0; i < poll_cnt; i++) {
 dp_netdev_process_rxq_port(pmd, poll_list[i].rx,
poll_list[i].port_no);
+
+dp_netdev_flush_txq_ports(pmd);
 }
 
 if (lc++ > 1024) {
@@ -3781,6 +3783,9 @@ reload:
 }
 }
 
+/* Flush the queues as part of reconfiguration logic. */
+dp_netdev_flush_txq_ports(pmd);
+
 poll_cnt = pmd_load_queues_and_ports(pmd, _list);
 exiting = latch_is_set(>exit_latch);
 /* Signal here to make sure the pmd finishes
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 03/18] Keepalive: Add initial keepalive support.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit introduces the initial keepalive support by adding
'keepalive' module and also helper and initialization functions
that will be invoked by later commits.

This commit adds new ovsdb column "keepalive" that shows the status
of the datapath threads. This is implemented for DPDK datapath and
only status of PMD threads is reported.

For eg:
  To enable keepalive feature.
  'ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true'

  To set timer interval of 5000ms for monitoring packet processing
cores.
  'ovs-vsctl --no-wait set Open_vSwitch . \
 other_config:keepalive-interval="5000"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/automake.mk|   2 +
 lib/dpdk.c |  17 +
 lib/dpdk.h |   2 +
 lib/keepalive.c| 160 +
 lib/keepalive.h|  74 +
 lib/netdev-dpdk.c  |  61 -
 lib/netdev-dpdk.h  |   5 ++
 vswitchd/bridge.c  |   5 ++
 vswitchd/vswitch.ovsschema |   8 ++-
 vswitchd/vswitch.xml   |  49 ++
 10 files changed, 380 insertions(+), 3 deletions(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 54a1032..8f6e146 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -110,6 +110,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/json.c \
lib/jsonrpc.c \
lib/jsonrpc.h \
+   lib/keepalive.c \
+   lib/keepalive.h \
lib/lacp.c \
lib/lacp.h \
lib/latch.h \
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 8db63bf..917ef58 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -32,6 +32,7 @@
 
 #include "dirs.h"
 #include "fatal-signal.h"
+#include "keepalive.h"
 #include "netdev-dpdk.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
@@ -477,6 +478,22 @@ dpdk_init(const struct smap *ovs_other_config)
 }
 }
 
+int
+dpdk_ka_init(struct keepalive_info *ka_info)
+{
+/* Initialize keepalive subsystem */
+if ((rte_global_keepalive_info =
+rte_keepalive_create(_failcore_cb, ka_info)) == NULL) {
+VLOG_ERR("Keepalive initialization failed.");
+return -1;
+} else {
+rte_keepalive_register_relay_callback(rte_global_keepalive_info,
+dpdk_ka_update_core_state, ka_info);
+}
+
+return 0;
+}
+
 const char *
 dpdk_get_vhost_sock_dir(void)
 {
diff --git a/lib/dpdk.h b/lib/dpdk.h
index bdbb51b..177624d 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -34,9 +34,11 @@
 #endif /* DPDK_NETDEV */
 
 struct smap;
+struct keepalive_info;
 
 struct rte_keepalive *rte_global_keepalive_info;
 void dpdk_init(const struct smap *ovs_other_config);
+int dpdk_ka_init(struct keepalive_info *ka_info);
 void dpdk_set_lcore_id(unsigned cpu);
 const char *dpdk_get_vhost_sock_dir(void);
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
new file mode 100644
index 000..7d1c01c
--- /dev/null
+++ b/lib/keepalive.c
@@ -0,0 +1,160 @@
+/*
+ * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "dpdk.h"
+#include "keepalive.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+
+VLOG_DEFINE_THIS_MODULE(keepalive);
+
+static bool keepalive_enable = false;/* Keepalive disabled by default */
+static bool ka_init_status = ka_init_failure; /* Keepalive initialization */
+static uint32_t keepalive_timer_interval; /* keepalive timer interval */
+static struct keepalive_info *ka_info = NULL;
+
+inline bool
+ka_is_enabled(void)
+{
+return keepalive_enable;
+}
+
+inline int
+ka_get_pmd_tid(unsigned core_idx)
+{
+int tid = -1;
+if (ka_is_enabled()) {
+tid = ka_info->thread_id[core_idx];
+}
+ovs_assert(tid > 0);
+return tid;
+}
+
+void
+ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
+uint64_t last_alive)
+{
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_

[ovs-dev] [RFC PATCH v3 05/18] dpif-netdev: Add helper function to store datapath tids.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds an API to store the PMD thread ids in to KA info struct.
The thread ids shall be used to check false positives and for status and
statistics reporting.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  3 +++
 lib/keepalive.c   | 13 +
 lib/keepalive.h   |  1 +
 3 files changed, 17 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 2b65dc7..ce141e8 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -48,6 +48,7 @@
 #include "fat-rwlock.h"
 #include "flow.h"
 #include "hmapx.h"
+#include "keepalive.h"
 #include "latch.h"
 #include "netdev.h"
 #include "netdev-vport.h"
@@ -3708,6 +3709,8 @@ pmd_thread_main(void *f_)
 
 poll_list = NULL;
 
+ka_store_pmd_id(pmd->core_id);
+
 /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
 ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
 ovs_numa_thread_setaffinity_core(pmd->core_id);
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 747d947..54faf49 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -63,6 +63,19 @@ get_ka_init_status(void)
 return ka_init_status;
 }
 
+void
+ka_store_pmd_id(unsigned core_idx)
+{
+int tid = -1;
+#ifdef DPDK_NETDEV
+tid = rte_sys_gettid();
+#endif
+
+if (ka_is_enabled()) {
+ka_info->thread_id[core_idx] = tid;
+}
+}
+
 /* Register packet processing PMD thread to KA framework. */
 void
 ka_register_pmd_thread(int tid OVS_UNUSED, unsigned core_id)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index a35b309..67f89da 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -76,6 +76,7 @@ void ka_unregister_pmd_thread(int, unsigned);
 void ka_mark_pmd_thread_alive(void);
 void ka_mark_pmd_thread_sleep(void);
 
+void ka_store_pmd_id(unsigned core);
 uint32_t get_ka_interval(void);
 int get_ka_init_status(void);
 int ka_get_pmd_tid(unsigned core);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 04/18] keepalive: Add more helper functions to KA framework.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit introduces helper functions in 'keepalive' module that are
needed to register/unregister PMD threads to KA framework. Also
introduce APIs to mark the PMD core states.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 49 +
 lib/keepalive.h |  9 +
 2 files changed, 58 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 7d1c01c..747d947 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -50,6 +50,55 @@ ka_get_pmd_tid(unsigned core_idx)
 return tid;
 }
 
+/* Return the Keepalive timer interval. */
+inline uint32_t
+get_ka_interval(void)
+{
+return keepalive_timer_interval;
+}
+
+inline int
+get_ka_init_status(void)
+{
+return ka_init_status;
+}
+
+/* Register packet processing PMD thread to KA framework. */
+void
+ka_register_pmd_thread(int tid OVS_UNUSED, unsigned core_id)
+{
+if (ka_is_enabled()) {
+dpdk_register_pmd_core(core_id);
+}
+}
+
+/* Unregister packet processing PMD thread from KA framework. */
+void
+ka_unregister_pmd_thread(int tid OVS_UNUSED, unsigned core_id)
+{
+if (ka_is_enabled()) {
+dpdk_unregister_pmd_core(core_id);
+}
+}
+
+/* Mark packet processing core alive. */
+inline void
+ka_mark_pmd_thread_alive(void)
+{
+if (ka_is_enabled()) {
+dpdk_mark_pmd_core_alive();
+}
+}
+
+/* Mark packet processing core as idle. */
+inline void
+ka_mark_pmd_thread_sleep(void)
+{
+if (ka_is_enabled()) {
+dpdk_mark_pmd_core_sleep();
+}
+}
+
 void
 ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
 uint64_t last_alive)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index b87b66f..a35b309 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -71,4 +71,13 @@ void ka_set_pmd_state_ts(unsigned, enum keepalive_state, 
uint64_t);
 
 int ka_get_pmd_tid(unsigned core);
 bool ka_is_enabled(void);
+void ka_register_pmd_thread(int, unsigned);
+void ka_unregister_pmd_thread(int, unsigned);
+void ka_mark_pmd_thread_alive(void);
+void ka_mark_pmd_thread_sleep(void);
+
+uint32_t get_ka_interval(void);
+int get_ka_init_status(void);
+int ka_get_pmd_tid(unsigned core);
+
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 16/18] netdev-dpdk: Enable PMD health checks on heartbeat failure.

2017-06-18 Thread Bhanuprakash Bodireddy
The keepalive thread sends heartbeats to PMD thread and when PMD fails to
respond to successive heartbeats the PMD is potentially stalled. The PMD
state transition is as below:

ALIVE -> MISSING -> DEAD -> GONE

This commit enables PMD healthchecks when PMD doesn't respond to
heartbeats. This is needed to handle false negatives. With this commit
the new state transition is as below:

ALIVE -> MISSING -> DEAD -> CHECK -> GONE

PMD Health checking state is introduced and will immediately kickin when
the PMD gets in to DEAD state. As part of this below are considered.

  - Link status of the ports polled by PMD thread.
  - Statistics of the ports polled by PMD thread.
  - PMD polling and processing cycles.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c   | 16 ++
 lib/keepalive.h   |  2 ++
 lib/netdev-dpdk.c | 62 +--
 3 files changed, 78 insertions(+), 2 deletions(-)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 3690b70..d475ace 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -108,6 +108,7 @@ ka_register_thread(int tid, bool thread_is_pmd)
 pinfo->heartbeats = true;
 pinfo->core_id = core_id;
 pinfo->healthcheck = PMD_HC_DISABLE;
+pinfo->failures = 0;
 
 char *pname = get_process_name(tid);
 if (pname) {
@@ -269,6 +270,21 @@ ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state 
state,
 }
 
 void
+ka_inc_pmd_failures(unsigned core_id)
+{
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if (pinfo->core_id == core_id) {
+pinfo->failures++;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+
+void
 ka_load_process_list(struct hmap **process_list)
 {
 if (ka_is_enabled()) {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 1bd639b..4f30f36 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -64,6 +64,7 @@ struct ka_process_info {
 enum pmdhealth_check healthcheck;
 enum keepalive_state core_state;
 uint64_t core_last_seen_times;
+int failures;
 struct hmap_node node;
 };
 
@@ -127,6 +128,7 @@ void ka_disable_pmd_health_check(unsigned);
 bool ka_is_pmdhealth_check_enabled(unsigned);
 enum pmdhealth_check ka_get_pmd_health_check_state(unsigned);
 void ka_set_pmd_health_check_state(unsigned, enum pmdhealth_check);
+void ka_inc_pmd_failures(unsigned);
 
 void ka_store_pmd_id(unsigned core);
 uint32_t get_ka_interval(void);
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f33eeff..f71b017 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -602,6 +602,52 @@ dpdk_failcore_cb(void *ptr_data OVS_UNUSED, const int 
core_id)
 }
 }
 
+static void
+dpdk_ka_handle_failure(enum keepalive_state fail_state, const int core_id,
+   const enum rte_keepalive_state core_state,
+   uint64_t last_alive)
+{
+if (fail_state == KA_STATE_DEAD) {
+/* If process is in DEFUNC/UNINTERRUPTIBLE/TRACED state it is inactive
+ * and no additional health checks are needed. */
+uint32_t tid = ka_get_pmd_tid(core_id);
+if (process_is_active(tid)) {
+   /* Enable PMD health check only when PMD is in 'RUNNING' state and
+* still doesn't respond to heartbeats. Health checks are needed to
+* analyze other stats as we are in penultimate state of declaring
+* PMD as failed. */
+ka_enable_pmd_health_check(core_id);
+}
+ka_set_pmd_state_ts(core_id, KA_STATE_DEAD, last_alive);
+}
+
+if (fail_state == KA_STATE_GONE) {
+int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+
+switch (pmd_hc_state) {
+case PMD_HC_ENABLE:
+break;
+case PMD_HC_DISABLE:
+VLOG_DBG_RL(, "PMD thread [%d] died, health check disabled",
+core_id);
+break;
+case PMD_HC_PROGRESS:
+ka_set_pmd_state_ts(core_id, KA_STATE_CHECK, last_alive);
+break;
+
+case PMD_HC_COMPLETE:
+ka_inc_pmd_failures(core_id);
+ka_set_pmd_state_ts(core_id, core_state, last_alive);
+ka_disable_pmd_health_check(core_id);
+break;
+
+default:
+VLOG_DBG_RL(, "Unknown health check state %d", pmd_hc_state);
+OVS_NOT_REACHED();
+}
+}
+}
+
 /*
  * This function shall be invoked periodically to write the core status and
  * last seen timestamp of the cores in to keepalive info structure.
@@ -614,11 +660,23 @@ dpdk_ka_update_core_state(void *ptr_data OVS_UNUSED, 
const int core_id,
 case RTE_KA_STATE_ALIVE:
 case RTE_KA_STATE

[ovs-dev] [RFC PATCH v3 17/18] keepalive: Display extended Keepalive status.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds support to display the extended keepalive status.
The status can be displayed as follows.

  $ ovs-appctl keepalive/pmd-xstats-show

  keepAlive Status  : Enabled
  keepAlive Interval: 1000 ms

  pmd64
  PMD core_id : 0
  PMD thread id   : 1269 [ACTIVE]
  PMD heartbeats  : enabled
  PMD state   : ALIVE
  Last seen timestamp : 9123706507798853
  PMD failure count   : 0

  pmd65
  PMD core_id : 1
  PMD thread id   : 1270 [ACTIVE]
  PMD heartbeats  : enabled
  PMD state   : ALIVE
  Last seen timestamp : 9123706507801627
  PMD failure count   : 0

  pmd64
  PMD core_id : 2
  PMD thread id   : 1271 [ACTIVE]
  PMD heartbeats  : enabled
  PMD state   : ALIVE
  Last seen timestamp : 9125112827794550
  PMD failure count   : 0
  PMD health check: enabled
  Packet Stats
  Port dpdk0, Queue: 1, Link status: up
  rx_packets : 1801284454
  tx_packets : 0
  Cycle Stats
  Polling cycles : 35426111637
  Processing cycles : 10123697085

For PMD on core 2, on a heartbeat failure, health checks are enabled
and additional stats(pkt stats, cpu cycles) are displayed as above.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 112 
 1 file changed, 112 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index d475ace..38bff91 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -730,6 +730,116 @@ ka_unixctl_status(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 ds_destroy();
 }
 
+static void
+ka_unixctl_pmd_xstats_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_info_)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\n\t\tKeepalive xstats\n\n");
+
+ds_put_format(, "keepalive status  : %s\n",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+if (!ka_is_enabled()) {
+goto out;
+}
+
+ds_put_format(, "keepalive interval: %"PRIu32" ms\n",
+  get_ka_interval());
+
+struct keepalive_info *ka_info = (struct keepalive_info *)ka_info_;
+if (!ka_info) {
+goto out;
+}
+
+ds_put_format(, "PMD threads   : %"PRIu32" \n", ka_info->pmd_cnt);
+
+struct ka_process_info *pinfo, *pinfo_next;
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+char *state = NULL;
+if (pinfo->core_state == KA_STATE_UNUSED ||
+ pinfo->core_state == KA_STATE_SLEEP)
+continue;
+
+switch (pinfo->core_state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_CHECK:
+state = "HEALTH_CHECK_RUNNING";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+
+ds_put_format(, "\n");
+ds_put_format(, "  %s\n", pinfo->name);
+ds_put_format(, "\tPMD core_id : %d\n", pinfo->core_id);
+ds_put_format(, "\tPMD thread-id   : %d [%s]\n",
+ pinfo->tid, process_is_active(pinfo->tid) ?
+ "ACTIVE" : "INACTIVE");
+ds_put_format(, "\tPMD heartbeats  : %s\n",
+  ka_is_enabled() ? "enabled" : "disabled");
+ds_put_format(, "\tPMD state   : %s\n", state);
+ds_put_format(, "\tLast seen timestamp : %"PRIu64"\n",
+  pinfo->core_last_seen_times);
+
+ds_put_format(, "\tPMD failure count   : %d\n",
+  pinfo->failures);
+
+int health_check = pinfo->healthcheck;
+if (health_check) {
+ds_put_format(, "\tPMD health check: %s\n",
+  health_check ? "enabled" : "disabled");
+ds_put_format(, "\tPacket Stats\n");
+
+int cid = pinfo->core_id;
+int n = ka_info->ext_stats[cid].num_poll_ports;
+for (int id

[ovs-dev] [PATCH 2/6] netdev-dpdk: Add netdev_dpdk_txq_flush function.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds netdev_dpdk_txq_flush() function. If there are
any packets waiting in the queue, they are transmitted instantly
using the rte_eth_tx_burst function. In XPS enabled case, lock is
taken on the tx queue before flushing the queue.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
Signed-off-by: Markus Magnusson <markus.magnus...@ericsson.com>
Co-authored-by: Markus Magnusson <markus.magnus...@ericsson.com>
Acked-by: Eelco Chaudron <echau...@redhat.com>
---
 lib/netdev-dpdk.c | 31 +--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index cc84539..1e83116 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -293,6 +293,11 @@ struct dpdk_mp {
 struct ovs_list list_node OVS_GUARDED_BY(dpdk_mp_mutex);
 };
 
+/* Queue 'INTERIM_QUEUE_BURST_THRESHOLD' packets before transmitting.
+ * Defaults to 'NETDEV_MAX_BURST'(32) packets.
+ */
+#define INTERIM_QUEUE_BURST_THRESHOLD NETDEV_MAX_BURST
+
 /* There should be one 'struct dpdk_tx_queue' created for
  * each cpu core. */
 struct dpdk_tx_queue {
@@ -302,6 +307,12 @@ struct dpdk_tx_queue {
 * pmd threads (see 'concurrent_txq'). */
 int map;   /* Mapping of configured vhost-user queues
 * to enabled by guest. */
+int dpdk_pkt_cnt;  /* Number of buffered packets waiting to
+  be sent on DPDK tx queue. */
+struct rte_mbuf *dpdk_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+   /* Intermediate queue where packets can
+* be buffered to amortize the cost of MMIO
+* writes. */
 };
 
 /* dpdk has no way to remove dpdk ring ethernet devices
@@ -1897,9 +1908,25 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
  * few packets (< INTERIM_QUEUE_BURST_THRESHOLD) buffered in the queue.
  */
 static int
-netdev_dpdk_txq_flush(struct netdev *netdev OVS_UNUSED,
-  int qid OVS_UNUSED, bool concurrent_txq OVS_UNUSED)
+netdev_dpdk_txq_flush(struct netdev *netdev,
+  int qid, bool concurrent_txq)
 {
+struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+struct dpdk_tx_queue *txq = >tx_q[qid];
+
+if (OVS_LIKELY(txq->dpdk_pkt_cnt)) {
+if (OVS_UNLIKELY(concurrent_txq)) {
+qid = qid % dev->up.n_txq;
+rte_spinlock_lock(>tx_q[qid].tx_lock);
+}
+
+netdev_dpdk_eth_tx_burst(dev, qid, txq->dpdk_burst_pkts,
+ txq->dpdk_pkt_cnt);
+
+if (OVS_UNLIKELY(concurrent_txq)) {
+rte_spinlock_unlock(>tx_q[qid].tx_lock);
+}
+}
 return 0;
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/6] netdev: Add netdev_txq_flush function.

2017-06-18 Thread Bhanuprakash Bodireddy
Add netdev_txq_flush(), that flush packets on a queue. This is needed
to transmit packets on the intermediate queue.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
Signed-off-by: Markus Magnusson <markus.magnus...@ericsson.com>
Co-authored-by: Markus Magnusson <markus.magnus...@ericsson.com>
Acked-by: Eelco Chaudron <echau...@redhat.com>
---
 lib/netdev-bsd.c  |  1 +
 lib/netdev-dpdk.c | 26 +-
 lib/netdev-dummy.c|  1 +
 lib/netdev-linux.c|  1 +
 lib/netdev-provider.h |  8 
 lib/netdev-vport.c|  2 +-
 lib/netdev.c  |  9 +
 lib/netdev.h  |  1 +
 8 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index f863a18..cb0edd6 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -1548,6 +1548,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
 netdev_bsd_rxq_recv, \
 netdev_bsd_rxq_wait, \
 netdev_bsd_rxq_drain,\
+NULL,\
  \
 NO_OFFLOAD_API   \
 }
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index bba4de3..cc84539 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1892,6 +1892,17 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 }
 }
 
+/* Flush tx queues
+ * This is done periodically to empty the intermediate queue in case of
+ * few packets (< INTERIM_QUEUE_BURST_THRESHOLD) buffered in the queue.
+ */
+static int
+netdev_dpdk_txq_flush(struct netdev *netdev OVS_UNUSED,
+  int qid OVS_UNUSED, bool concurrent_txq OVS_UNUSED)
+{
+return 0;
+}
+
 static int
 netdev_dpdk_eth_send(struct netdev *netdev, int qid,
  struct dp_packet_batch *batch, bool may_steal,
@@ -3241,7 +3252,7 @@ unlock:
   SET_CONFIG, SET_TX_MULTIQ, SEND,\
   GET_CARRIER, GET_STATS, \
   GET_FEATURES, GET_STATUS,   \
-  RECONFIGURE, RXQ_RECV)  \
+  RECONFIGURE, RXQ_RECV, TXQ_FLUSH)   \
 { \
 NAME, \
 true,   /* is_pmd */  \
@@ -3308,6 +3319,7 @@ unlock:
 RXQ_RECV, \
 NULL,   /* rx_wait */ \
 NULL,   /* rxq_drain */   \
+TXQ_FLUSH,  /* txq_flush */   \
 NO_OFFLOAD_API\
 }
 
@@ -3325,7 +3337,8 @@ static const struct netdev_class dpdk_class =
 netdev_dpdk_get_features,
 netdev_dpdk_get_status,
 netdev_dpdk_reconfigure,
-netdev_dpdk_rxq_recv);
+netdev_dpdk_rxq_recv,
+netdev_dpdk_txq_flush);
 
 static const struct netdev_class dpdk_ring_class =
 NETDEV_DPDK_CLASS(
@@ -3341,7 +3354,8 @@ static const struct netdev_class dpdk_ring_class =
 netdev_dpdk_get_features,
 netdev_dpdk_get_status,
 netdev_dpdk_reconfigure,
-netdev_dpdk_rxq_recv);
+netdev_dpdk_rxq_recv,
+NULL);
 
 static const struct netdev_class dpdk_vhost_class =
 NETDEV_DPDK_CLASS(
@@ -3357,7 +3371,8 @@ static const struct netdev_class dpdk_vhost_class =
 NULL,
 NULL,
 netdev_dpdk_vhost_reconfigure,
-netdev_dpdk_vhost_rxq_recv);
+netdev_dpdk_vhost_rxq_recv,
+NULL);
 static const struct netdev_class dpdk_vhost_client_class =
 NETDEV_DPDK_CLASS(
 "dpdkvhostuserclient",
@@ -3372,7 +3387,8 @@ static const struct netdev_class dpdk_vhost_client_class =
 NULL,
 NULL,
 netdev_dpdk_vhost_client_reconfigure,
-netdev_dpdk_vhost_rxq_recv);
+netdev_dpdk_vhost_rxq_recv,
+NULL);
 
 void
 netdev_dpdk_register(void)
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index d189a86..216c98e 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -1414,6 +1414,7 @@ netdev_dummy_update_flags(struct netdev *netdev_,
 netdev_dummy_rxq_recv,  \
 netdev_dummy_rxq_wait,  \
 netdev_dummy_rxq_drain, \
+NULL,   \
 \
 NO_OFFLOAD_API  \
 }
diff --git a/lib/netdev-linux.c b

[ovs-dev] [PATCH 0/6 V2] netdev-dpdk: Use intermediate queue during packet transmission.

2017-06-18 Thread Bhanuprakash Bodireddy
After packet classification, packets are queued in to batches depending
on the matching netdev flow. Thereafter each batch is processed to
execute the related actions. This becomes particularly inefficient if
there are few packets in each batch as rte_eth_tx_burst() incurs expensive
MMIO writes.

This patch series implements intermediate queue for DPDK and vHost User ports.
Packets are queued and burst when the packet count exceeds threshold. Also
drain logic is implemented to handle cases where packets can get stuck in
the tx queues at low rate traffic conditions. Care has been taken to see
that latency is well with in the acceptable limits. Testing shows significant
performance gains with this implementation.

This path series combines the earlier 2 patches posted below.
  DPDK patch: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/331039.html
  vHost User patch: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332271.html

Also this series proposes to disable the retries on vHost User ports and make 
it 
configurable via ovsdb.(controversial?)

Performance Numbers with intermediate queue:

  DPDK ports
 ===

  Throughput for P2P scenario, for two 82599ES 10G port with 64 byte packets

  Number
  flows   MASTER With PATCH
  ====
10   1072728313393844
32704225311228799
507515491 9607791
   1005838699 9430730
   5005285066 7845807
  10005226477 7135601

   Latency test

   MASTER
   ===
   Pkt size  min(ns)  avg(ns)  max(ns)
512  4,631  5,022309,914
   1024  5,545  5,749104,294
   1280  5,978  6,159 45,306
   1518  6,419  6,774946,850

   PATCH
   =
   Pkt size  min(ns)  avg(ns)  max(ns)
512  4,711  5,064182,477
   1024  5,601  5,888701,654
   1280  6,018  6,491533,037
   1518  6,467  6,734312,471

   vHost User ports
  ==

  Throughput for PV scenario, with 64 byte packets

   Number
   flows   MASTERWith PATCH
    =   =
105945899 7833914
323872211 6530133
503283713 6618711
   1003132540 5857226
   5002964499 5273006
  10002931952 5178038

  Latency test.

  MASTER
  ===
  Pkt size  min(ns)  avg(ns)  max(ns)
   512  10,011   12,100   281,915
  1024   7,8709,313   193,116
  1280   7,8629,036   194,439
  1518   8,2159,417   204,782

  PATCH
  ===
  Pkt size  min(ns)  avg(ns)  max(ns)
   512  10,492   13,655   281,538
  1024   8,4079,784   205,095
  1280   8,3999,750   194,888
  1518   8,3679,722   196,973

Performance number reported by Eelco Chaudron  at
  https://mail.openvswitch.org/pipermail/ovs-dev/2017-June/333949.html
  https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332271.html
  https://mail.openvswitch.org/pipermail/ovs-dev/2017-April/331039.html

---
v1->v2
  * Rebased on master due to HW offload changes.
  * Introduced union for packet count and buffers and changed the variable
names appropriately.
  * No functional change changes.

Bhanuprakash Bodireddy (6):
  netdev: Add netdev_txq_flush function.
  netdev-dpdk: Add netdev_dpdk_txq_flush function.
  netdev-dpdk: Add intermediate queue support.
  dpif-netdev: Flush the packets in intermediate queue.
  netdev-dpdk: Add netdev_dpdk_vhost_txq_flush function.
  netdev-dpdk: Enable intermediate queue for vHost User port.

 lib/dpif-netdev.c |  49 -
 lib/netdev-bsd.c  |   1 +
 lib/netdev-dpdk.c | 194 ++
 lib/netdev-dummy.c|   1 +
 lib/netdev-linux.c|   1 +
 lib/netdev-provider.h |   8 +++
 lib/netdev-vport.c|   2 +-
 lib/netdev.c  |   9 +++
 lib/netdev.h  |   1 +
 9 files changed, 235 insertions(+), 31 deletions(-)

-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 01/18] dpdk: Add helper functions for DPDK datapath keepalive.

2017-06-18 Thread Bhanuprakash Bodireddy
Introduce helper functions in 'dpdk' module that are needed for
DPDK keepalive functionality. Also add dummy functions in 'dpdk-stub' module
that are needed when DPDK datapath is not available.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpdk-stub.c | 24 
 lib/dpdk.c  | 31 +++
 lib/dpdk.h  | 10 ++
 3 files changed, 65 insertions(+)

diff --git a/lib/dpdk-stub.c b/lib/dpdk-stub.c
index daef729..d7fb19b 100644
--- a/lib/dpdk-stub.c
+++ b/lib/dpdk-stub.c
@@ -48,3 +48,27 @@ dpdk_get_vhost_sock_dir(void)
 {
 return NULL;
 }
+
+void
+dpdk_register_pmd_core(unsigned core_id OVS_UNUSED)
+{
+/* Nothing */
+}
+
+void
+dpdk_unregister_pmd_core(unsigned core_id OVS_UNUSED)
+{
+/* Nothing */
+}
+
+void
+dpdk_mark_pmd_core_alive(void)
+{
+/* Nothing */
+}
+
+void
+dpdk_mark_pmd_core_sleep(void)
+{
+/* Nothing */
+}
diff --git a/lib/dpdk.c b/lib/dpdk.c
index 8da6c32..8db63bf 100644
--- a/lib/dpdk.c
+++ b/lib/dpdk.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 
+#include 
 #include 
 #include 
 #ifdef DPDK_PDUMP
@@ -489,3 +490,33 @@ dpdk_set_lcore_id(unsigned cpu)
 ovs_assert(cpu != NON_PMD_CORE_ID);
 RTE_PER_LCORE(_lcore_id) = cpu;
 }
+
+/* Register packet processing core 'core_id' for liveness checks. */
+void
+dpdk_register_pmd_core(unsigned core)
+{
+rte_keepalive_register_core(rte_global_keepalive_info, core);
+}
+
+void
+dpdk_unregister_pmd_core(unsigned core OVS_UNUSED)
+{
+/* XXX: DPDK unfortunately hasn't implemented unregister API
+ * This will be fixed later, instead use sleep API now.
+ */
+rte_keepalive_mark_sleep(rte_global_keepalive_info);
+}
+
+/* Mark packet processing core alive. */
+void
+dpdk_mark_pmd_core_alive(void)
+{
+rte_keepalive_mark_alive(rte_global_keepalive_info);
+}
+
+/* Mark packet processing core as idle. */
+void
+dpdk_mark_pmd_core_sleep(void)
+{
+rte_keepalive_mark_sleep(rte_global_keepalive_info);
+}
diff --git a/lib/dpdk.h b/lib/dpdk.h
index 673a1f1..bdbb51b 100644
--- a/lib/dpdk.h
+++ b/lib/dpdk.h
@@ -17,6 +17,7 @@
 #ifndef DPDK_H
 #define DPDK_H
 
+#include 
 #ifdef DPDK_NETDEV
 
 #include 
@@ -26,14 +27,23 @@
 
 #else
 
+#include 
+
 #define NON_PMD_CORE_ID UINT32_MAX
 
 #endif /* DPDK_NETDEV */
 
 struct smap;
 
+struct rte_keepalive *rte_global_keepalive_info;
 void dpdk_init(const struct smap *ovs_other_config);
 void dpdk_set_lcore_id(unsigned cpu);
 const char *dpdk_get_vhost_sock_dir(void);
 
+/* Keepalive APIs */
+void dpdk_register_pmd_core(unsigned core_id);
+void dpdk_unregister_pmd_core(unsigned core_id);
+void dpdk_mark_pmd_core_alive(void);
+void dpdk_mark_pmd_core_sleep(void);
+
 #endif /* dpdk.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 02/18] process: Add helper functions to retrieve process related info.

2017-06-18 Thread Bhanuprakash Bodireddy
Implement helper functions to retrieve the process status, name and last
core the process was scheduled. The APIs will be used by keepalive monitoring
framework in future commits.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/process.c | 152 ++
 lib/process.h |  13 +
 2 files changed, 165 insertions(+)

diff --git a/lib/process.c b/lib/process.c
index e9d0ba9..4c029c1 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -50,6 +50,20 @@ struct process {
 int status;
 };
 
+struct pstate2Num {
+char *pidstate;
+int num;
+};
+
+const struct pstate2Num pstate_map[] = {
+{ "S", STOPPED_STATE },
+{ "R", ACTIVE_STATE },
+{ "t", TRACED_STATE },
+{ "Z", DEFUNC_STATE },
+{ "D", UNINTERRUPTIBLE_SLEEP_STATE },
+{ "NULL", UNUSED_STATE },
+};
+
 /* Pipe used to signal child termination. */
 static int fds[2];
 
@@ -390,6 +404,144 @@ process_run(void)
 #endif
 }
 
+int
+get_process_status(int pid, int *pstate)
+{
+#ifdef __linux__
+static char process_name[20];
+FILE *stream;
+char line[75];
+char Name[15], value[5], status[20];
+int i, ln;
+
+snprintf(process_name, sizeof(process_name),
+ "/proc/%d/status", pid);
+stream = fopen(process_name, "r");
+if (stream == NULL) {
+VLOG_WARN_ONCE("%s: open failed: %s", process_name,
+ovs_strerror(errno));
+return errno;
+}
+
+ln=0;
+while (fgets(line, sizeof line, stream)) {
+if (!ovs_scan(line,
+  "%6s %2s %14s\n",
+   Name, value, status)) {
+VLOG_WARN_ONCE("%s: could not parse line %d: %s",
+process_name, ln, line);
+continue;
+}
+if (!strcmp(Name, "State:")) {
+for (i=0; pstate_map[i].pidstate != NULL; i++) {
+if (strcmp(pstate_map[i].pidstate, value) == 0) {
+VLOG_WARN_ONCE("The state is %s, status is %d\n",
+pstate_map[i].pidstate, pstate_map[i].num);
+*pstate = pstate_map[i].num;
+break;
+}
+}
+break;
+}
+ln++;
+   }
+   return 0;
+#else
+   return ENOSYS;
+#endif
+}
+
+bool
+process_is_active(int pid)
+{
+#ifdef __linux__
+int pstate;
+int err = get_process_status(pid, );
+if (!err) {
+if (pstate == ACTIVE_STATE) {
+return true;
+}
+}
+return false;
+#else
+   return false;
+#endif
+}
+
+char *
+get_process_name(int pid)
+{
+#ifdef __linux__
+static char proc_path[PATH_MAX];
+FILE *stream;
+char line[20];
+char *pname = xmalloc(20);
+
+if (pid == -1) {
+   VLOG_ERR("Invalid process id : %d", pid);
+   return NULL;
+}
+
+snprintf(proc_path, sizeof(proc_path),
+ "/proc/%d/task/%d/comm", pid, pid);
+stream = fopen(proc_path, "r");
+if (!stream) {
+VLOG_WARN("%s: open failed: %s", proc_path, ovs_strerror(errno));
+return NULL;
+}
+
+if (fgets(line, sizeof line, stream) != NULL) {
+if (ovs_scan(line, "%s", pname)) {
+return pname;
+}
+}
+return NULL;
+#else
+return NULL;
+#endif
+}
+
+/* Retrieve the last core id that executed the process.
+ *
+ * Refer http://man7.org/linux/man-pages/man5/proc.5.html
+ * and the processor field in /proc/[pid]/stat.
+ */
+int
+get_cpu_num(int pid)
+{
+#ifdef __linux__
+static char proc_path[PATH_MAX];
+FILE *stream;
+char line[500];
+
+snprintf(proc_path, sizeof(proc_path),
+ "/proc/%d/stat", pid);
+stream = fopen(proc_path, "r");
+if (!stream) {
+VLOG_WARN_ONCE("%s: open failed: %s", proc_path, ovs_strerror(errno));
+return errno;
+}
+
+int i;
+int cpu_id = -1;
+if (fgets(line, sizeof line, stream) != NULL) {
+char *tok, *endptr = NULL;
+for (tok = strtok_r(line, " ", ), i = 1; tok != NULL;
+tok = strtok_r(NULL, " ", ), i++) {
+VLOG_DBG("token :%s", tok);
+if (i == 39) {
+cpu_id = atoi(tok);
+break;
+}
+}
+}
+
+ovs_assert(cpu_id >= 0)
+return cpu_id;
+#else
+return ENOSYS;
+#endif
+}
 
 /* Causes the next call to poll_block() to wake up when process 'p' has
  * exited. */
diff --git a/lib/process.h b/lib/process.h
index 3feac7e..041767d 100644
--- a/lib/process.h
+++ b/lib/process.h
@@ -20,6 +20,15 @@
 #include 
 #include 
 
+enum process_states {
+UNUSED_STATE,
+STOPPED_STATE,
+ACTIVE_STATE,
+TRACED_STATE,
+DEFUNC_STATE,
+UNINTERRUPTIBLE_

[ovs-dev] [RFC PATCH v3 00/18] Add OVS DPDK keep-alive functionality

2017-06-18 Thread Bhanuprakash Bodireddy
Keepalive feature is aimed at achieving Fastpath Service Assurance
in OVS-DPDK deployments. It adds support for monitoring the packet
processing cores(PMD thread cores) by dispatching heartbeats at regular
intervals. Incase of heartbeat misses additional health checks are
enabled on the PMD thread to detect the failure and the same shall be
reported to higher level fault management systems/frameworks.

The implementation uses OVSDB for reporting the datapath status and the
health of the PMD threads. Any external monitoring application can read
the status from OVSDB at regular intervals (or) subscribe to the updates
in OVSDB so that they get notified when the changes happen on OVSDB.

keepalive info struct is created and initialized for storing the
status of the PMD threads. This is initialized by main thread(vswitchd)
as part of init process and will be periodically updated by 'keepalive'
thread. keepalive feature can be enabled through below OVSDB settings.

enable-keepalive=true
  - Keepalive feature is disabled by default.

keepalive-interval="5000"
  - Timer interval in milliseconds for monitoring the packet
processing cores.

When KA is enabled, 'ovs-keepalive' thread shall be spawned that wakes
up at regular intervals to update the timestamp and status of pmd cores
in keepalive info struct. This information shall be read by vswitchd thread
and write the status in to 'keepalive' column of Open_vSwitch table in OVSDB.

An external monitoring framework like collectd with ovs events support
can read (or) subscribe to the datapath status changes in ovsdb. When the state
is updated, the collectd shall be notified and will eventually relay the status
to ceilometer service running in the controller. Below is the high level
overview of deployment model.

Compute NodeControllerCompute Node

Collectd  <--> Ceilometer <>   Collectd

OvS DPDK   OvS DPDK

+-+
| VM  |
+--+--+
\---+---/
|
+--+---+   ++--+ +--+---+
| OVS  |-> |   ovsevents plugin| --> |   collectd   |
+--+---+   ++--+ +--+---+

+--+-+ +---++ |
| Ceilometer | <-- | collectd ceilometer plugin |  <---
+--+-+ +---++

Performance impact:
  No noticeable performance or latency impact is observed with
  KA feature enabled.

-
v2-> v3
  * Remove POSIX shared memory block implementation (suggested by Aaron).
  * Rework the logic to register and track threads instead of cores. This way
in the future any thread can be registered to KA framework. For now only PMD
threads are tracked (suggested by Aaron).
  * Refactor few APIs and further clean up the code.
   
v1-> v2
  * Merged the xml and schema commits to later commit where the actual
implementation is done(suggested by Ben).
  * Fix ovs-appctl keepalive/* hang issue when KA disabled.
  * Fixed memory leaks with appctl commands for keepalive/pmd-health-show,
pmd-xstats-show.
  * Refactored code and fixed APIs dealing with PMD health monitoring.

Bhanuprakash Bodireddy (18):
[9] patches help update OVSDB with keepalive status

  dpdk: Add helper functions for DPDK datapath keepalive.
  process: Add helper functions to retrieve process related info.
  Keepalive: Add initial keepalive support.
  keepalive: Add more helper functions to KA framework.
  dpif-netdev: Add helper function to store datapath tids.
  dpif-netdev: Register packet processing cores to KA framework.
  dpif-netdev: Enable heartbeats for DPDK datapath.
  keepalive: Retrieve PMD status periodically.
  bridge: Update keepalive status in OVSDB

  keepalive: Add support to query keepalive statistics.
  keepalive: Add support to query keepalive status.

[5] Patches add additional health checks in case of heartbeat failure.

  dpif-netdev: Add additional datapath health checks.
  keepalive: Check the link status as part of PMD health checks.
  keepalive: Check the packet statistics as part of PMD health checks.
  keepalive: Check the PMD cycle stats as part of PMD health checks.
  netdev-dpdk: Enable PMD health checks on heartbeat failure.
  keepalive: Display extended Keepalive status.
  Documentation: Update DPDK doc with Keepalive feature.

 Documentation/howto/dpdk.rst |  90 +
 lib/automake.mk  |   2 +
 lib/dpdk-stub.c  |  30 ++
 lib/dpdk.c   |  55 +++
 lib/dpdk.h   |  14 +
 lib/dpif-netdev.c| 203 +-
 lib/dpif-netdev.h|   6 +
 lib/keepalive.c  | 917 +++
 lib/keepalive.h  | 150 +++
 lib/netdev-dpdk.c| 119 +-
 lib/netdev-dpdk.h|   5 +
 lib/pro

[ovs-dev] [RFC PATCH v3 12/18] dpif-netdev: Add additional datapath health checks.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit enables additional datapath health checks. The checks
are enabled only on a PMD heartbeat failure. On missing three successive
heartbeats additional health checks needs to be performed on respective
PMD thread to confirm the failure.

The datapath health is monitored periodically from keepalive thread.
It should be noted that the PMD health checks are only performed on
the PMD threads whose health check is enabled.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 30 +
 lib/keepalive.c   | 81 +++
 lib/keepalive.h   | 16 +++
 3 files changed, 127 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 93bda20..06d2e23 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -971,6 +971,35 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void
+pmd_health_check(struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
+{
+/* Nothing */
+}
+
+static void
+get_datapath_health(struct dp_netdev *dp)
+{
+static struct hmap *process_list = NULL;
+if (!process_list) {
+ka_load_process_list(_list);
+}
+
+struct ka_process_info *pinfo;
+HMAP_FOR_EACH (pinfo, node, process_list) {
+int core_id = pinfo->core_id;
+struct dp_netdev_pmd_thread *pmd;
+
+/* Check only PMD threads whose health check is enabled. */
+if (OVS_LIKELY(pinfo->healthcheck == PMD_HC_DISABLE)) {
+continue;
+}
+
+pmd = dp_netdev_get_pmd(dp, core_id);
+pmd_health_check(pmd);
+}
+}
+
 static void *
 ovs_keepalive(void *f_)
 {
@@ -982,6 +1011,7 @@ ovs_keepalive(void *f_)
 int n_pmds = cmap_count(>poll_threads) - 1;
 if (n_pmds > 0) {
 dispatch_heartbeats();
+get_datapath_health(dp);
 get_ka_stats();
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 6edb440..997bebf 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -105,6 +105,7 @@ ka_register_thread(int tid, bool thread_is_pmd)
 pinfo->tid = tid;
 pinfo->heartbeats = true;
 pinfo->core_id = core_id;
+pinfo->healthcheck = PMD_HC_DISABLE;
 
 char *pname = get_process_name(tid);
 if (pname) {
@@ -176,6 +177,78 @@ ka_mark_pmd_thread_sleep(void)
 }
 
 void
+ka_enable_pmd_health_check(unsigned core_id)
+{
+if (ka_is_enabled()) {
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+pinfo->healthcheck = PMD_HC_ENABLE;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+}
+
+void
+ka_disable_pmd_health_check(unsigned core_id)
+{
+if (ka_is_enabled()) {
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+pinfo->healthcheck = PMD_HC_DISABLE;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+}
+
+enum pmdhealth_check
+ka_get_pmd_health_check_state(unsigned core_id)
+OVS_REQUIRES(ka_info->proclist_mutex)
+{
+int hc = PMD_HC_DISABLE;
+if (ka_is_enabled()) {
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+hc = pinfo->healthcheck;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+
+return hc;
+}
+
+void
+ka_set_pmd_health_check_state(unsigned core_id, enum pmdhealth_check state)
+{
+if (ka_is_enabled()) {
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+pinfo->healthcheck = state;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+}
+
+void
 ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
 uint64_t last_alive)
 {
@@ -193,6 +266,14 @@ ka_set_pmd_state_ts(

[ovs-dev] [RFC PATCH v3 11/18] keepalive: Add support to query keepalive status.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds support to query if keepalive status is
enabled/disabled.

  $ ovs-appctl keepalive/status
keepAlive Status: Enabled

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index a4b8d01..6edb440 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -420,6 +420,19 @@ out:
 ds_destroy();
 }
 
+static void
+ka_unixctl_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+
+ds_put_format(, "keepAlive Status: %s",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 static int
 ka_init__(void)
 {
@@ -466,6 +479,8 @@ ka_init(const struct smap *ovs_other_config)
 
 unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
   ka_unixctl_pmd_health_show, ka_info);
+unixctl_command_register("keepalive/status", "", 0, 0,
+  ka_unixctl_status, NULL);
 
 ovsthread_once_done(_enable);
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 5/6] netdev-dpdk: Add netdev_dpdk_vhost_txq_flush function.

2017-06-18 Thread Bhanuprakash Bodireddy
Add netdev_dpdk_vhost_txq_flush(), that flushes packets on vHost User
port queues. Also add netdev_dpdk_vhost_tx_burst() function that
uses rte_vhost_enqueue_burst() to enqueue burst of packets on vHost User
ports.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
Acked-by: Eelco Chaudron <echau...@redhat.com>
---
 lib/netdev-dpdk.c | 76 ---
 1 file changed, 72 insertions(+), 4 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 50a9a2c..47343e8 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -307,12 +307,22 @@ struct dpdk_tx_queue {
 * pmd threads (see 'concurrent_txq'). */
 int map;   /* Mapping of configured vhost-user queues
 * to enabled by guest. */
-int dpdk_pkt_cnt;  /* Number of buffered packets waiting to
+union {
+int dpdk_pkt_cnt;  /* Number of buffered packets waiting to
   be sent on DPDK tx queue. */
-struct rte_mbuf *dpdk_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+int vhost_pkt_cnt; /* Number of buffered packets waiting to
+  be sent on vhost port. */
+};
+
+union {
+struct rte_mbuf *dpdk_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
/* Intermediate queue where packets can
 * be buffered to amortize the cost of MMIO
 * writes. */
+struct dp_packet *vhost_burst_pkts[INTERIM_QUEUE_BURST_THRESHOLD];
+   /* Intermediate queue where packets can
+* be buffered for vhost ports. */
+};
 };
 
 /* dpdk has no way to remove dpdk ring ethernet devices
@@ -1719,6 +1729,63 @@ netdev_dpdk_vhost_update_tx_counters(struct netdev_stats 
*stats,
 }
 }
 
+static int
+netdev_dpdk_vhost_tx_burst(struct netdev_dpdk *dev, int qid)
+{
+struct dpdk_tx_queue *txq = >tx_q[qid];
+struct rte_mbuf **cur_pkts = (struct rte_mbuf **)txq->vhost_burst_pkts;
+
+int tx_vid = netdev_dpdk_get_vid(dev);
+int tx_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
+uint32_t sent = 0;
+uint32_t retries = 0;
+uint32_t sum, total_pkts;
+
+total_pkts = sum = txq->vhost_pkt_cnt;
+do {
+uint32_t ret;
+ret = rte_vhost_enqueue_burst(tx_vid, tx_qid, _pkts[sent], sum);
+if (OVS_UNLIKELY(!ret)) {
+/* No packets enqueued - do not retry. */
+break;
+} else {
+/* Packet have been sent */
+sent += ret;
+
+/* 'sum' packet have to be retransmitted */
+sum -= ret;
+}
+} while (sum && (retries++ < VHOST_ENQ_RETRY_NUM));
+
+for (int i = 0; i < total_pkts; i++) {
+dp_packet_delete(txq->vhost_burst_pkts[i]);
+}
+
+/* Reset pkt count */
+txq->vhost_pkt_cnt = 0;
+
+/* 'sum' refers to packets dropped */
+return sum;
+}
+
+/* Flush the txq if there are any packets available.
+ * dynamic_txqs/concurrent_txq is disabled for vHost User ports as
+ * 'OVS_VHOST_MAX_QUEUE_NUM' txqs are preallocated.
+ */
+static int
+netdev_dpdk_vhost_txq_flush(struct netdev *netdev, int qid,
+bool concurrent_txq OVS_UNUSED)
+{
+struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+struct dpdk_tx_queue *txq = >tx_q[qid];
+
+if (OVS_LIKELY(txq->vhost_pkt_cnt)) {
+netdev_dpdk_vhost_tx_burst(dev, qid);
+}
+
+return 0;
+}
+
 static void
 __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
  struct dp_packet **pkts, int cnt)
@@ -3432,7 +3499,8 @@ static const struct netdev_class dpdk_vhost_class =
 NULL,
 netdev_dpdk_vhost_reconfigure,
 netdev_dpdk_vhost_rxq_recv,
-NULL);
+netdev_dpdk_vhost_txq_flush);
+
 static const struct netdev_class dpdk_vhost_client_class =
 NETDEV_DPDK_CLASS(
 "dpdkvhostuserclient",
@@ -3448,7 +3516,7 @@ static const struct netdev_class dpdk_vhost_client_class =
 NULL,
 netdev_dpdk_vhost_client_reconfigure,
 netdev_dpdk_vhost_rxq_recv,
-NULL);
+netdev_dpdk_vhost_txq_flush);
 
 void
 netdev_dpdk_register(void)
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 6/6] netdev-dpdk: Enable intermediate queue for vHost User port.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit refactors the __netdev_dpdk_vhost_send() and enables
intermediate queue where in the packets are buffered till the threshold
'INTERIM_QUEUE_BURST_THRESHOLD[32] is hit and eventually gets transmitted.

This commit improves the throughput as reported below in simple Physical
to virtual testcase with higher flows @10G Line rate.

  Num FlowMaster  Commit
     =   =
  10  5945899 7833914
  32  3872211 6530133
  50  3283713 6618711
  100 3132540 5857226
  500 2964499 5273006
  10002931952 5178038

Latency stats:

  MASTER
  ---
  Pkt size  min(ns)  avg(ns)  max(ns)
  512   10,011   12,100   281,915
  1024   7,8709,313   193,116
  1280   7,8629,036   194,439
  1518   8,2159,417   204,782

  MASTER + COMMIT
  ---
  Pkt size  min(ns)  avg(ns)  max(ns)
  512   10,492   13,655   281,538
  1024   8,4079,784   205,095
  1280   8,3999,750   194,888
  1518   8,3679,722   196,973

Reported-at: https://mail.openvswitch.org/pipermail/ovs-dev/2017-May/332271.html
 [By Eelco Chaudron <echau...@redhat.com>]
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Signed-off-by: Antonio Fischetti <antonio.fische...@intel.com>
Co-authored-by: Antonio Fischetti <antonio.fische...@intel.com>
Acked-by: Eelco Chaudron <echau...@redhat.com>
---
 lib/netdev-dpdk.c | 38 +++---
 1 file changed, 15 insertions(+), 23 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 47343e8..69cc5ff 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1794,16 +1794,21 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 struct rte_mbuf **cur_pkts = (struct rte_mbuf **) pkts;
 unsigned int total_pkts = cnt;
 unsigned int dropped = 0;
-int i, retries = 0;
+int i;
 
 qid = dev->tx_q[qid % netdev->n_txq].map;
+struct dpdk_tx_queue *txq = >tx_q[qid];
 
 if (OVS_UNLIKELY(!is_vhost_running(dev) || qid < 0
  || !(dev->flags & NETDEV_UP))) {
 rte_spinlock_lock(>stats_lock);
 dev->stats.tx_dropped+= cnt;
 rte_spinlock_unlock(>stats_lock);
-goto out;
+
+for (i = 0; i < total_pkts; i++) {
+dp_packet_delete(pkts[i]);
+}
+return;
 }
 
 rte_spinlock_lock(>tx_q[qid].tx_lock);
@@ -1813,34 +1818,21 @@ __netdev_dpdk_vhost_send(struct netdev *netdev, int qid,
 cnt = netdev_dpdk_qos_run(dev, cur_pkts, cnt);
 dropped = total_pkts - cnt;
 
-do {
-int vhost_qid = qid * VIRTIO_QNUM + VIRTIO_RXQ;
-unsigned int tx_pkts;
-
-tx_pkts = rte_vhost_enqueue_burst(netdev_dpdk_get_vid(dev),
-  vhost_qid, cur_pkts, cnt);
-if (OVS_LIKELY(tx_pkts)) {
-/* Packets have been sent.*/
-cnt -= tx_pkts;
-/* Prepare for possible retry.*/
-cur_pkts = _pkts[tx_pkts];
-} else {
-/* No packets sent - do not retry.*/
-break;
+int idx = 0;
+while (idx < cnt) {
+txq->vhost_burst_pkts[txq->vhost_pkt_cnt++] = pkts[idx++];
+
+if (txq->vhost_pkt_cnt >= INTERIM_QUEUE_BURST_THRESHOLD) {
+dropped += netdev_dpdk_vhost_tx_burst(dev, qid);
 }
-} while (cnt && (retries++ <= VHOST_ENQ_RETRY_NUM));
+}
 
 rte_spinlock_unlock(>tx_q[qid].tx_lock);
 
 rte_spinlock_lock(>stats_lock);
 netdev_dpdk_vhost_update_tx_counters(>stats, pkts, total_pkts,
- cnt + dropped);
+ dropped);
 rte_spinlock_unlock(>stats_lock);
-
-out:
-for (i = 0; i < total_pkts - dropped; i++) {
-dp_packet_delete(pkts[i]);
-}
 }
 
 /* Tx function. Transmit packets indefinitely */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC PATCH v3 13/18] keepalive: Check the link status as part of PMD health checks.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds the initial support in to performing PMD health checks.
The ports handled by the PMD threads are checked for the link status and
the same is updated in to keepalive info structure.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 37 +++--
 lib/keepalive.c   | 52 
 lib/keepalive.h   | 18 +-
 3 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 06d2e23..4d8d3e7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -51,6 +51,7 @@
 #include "keepalive.h"
 #include "latch.h"
 #include "netdev.h"
+#include "netdev-provider.h"
 #include "netdev-vport.h"
 #include "netlink.h"
 #include "odp-execute.h"
@@ -972,9 +973,41 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 }
 
 static void
-pmd_health_check(struct dp_netdev_pmd_thread *pmd OVS_UNUSED)
+pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 {
-/* Nothing */
+int port_link_status = 0;
+struct rxq_poll *poll;
+
+struct svec pmd_poll_list;
+svec_init(_poll_list);
+HMAP_FOR_EACH (poll, node, >poll_list) {
+svec_add(_poll_list, netdev_rxq_get_name(poll->rxq->rx));
+}
+
+/* With MQ enabled, remove the duplicates. */
+svec_sort_unique(_poll_list);
+
+const char *port_name;
+int i = 0;
+SVEC_FOR_EACH (i, port_name, _poll_list) {
+struct netdev *dev = netdev_from_name(port_name);
+if (dev) {
+char *link_state = netdev_get_carrier(dev) ? "up" : "down";
+ka_info_update_port_status(port_name, 0, link_state,
+pmd->core_id, i);
+netdev_close(dev);
+}
+}
+svec_destroy(_poll_list);
+
+port_link_status = ka_get_polled_ports_status(pmd->core_id);
+
+int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
+if (PMD_HC_COMPLETE == pmd_hc_state) {
+if (port_link_status == ACTIVE_RUN_STATE) {
+ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
+}
+}
 }
 
 static void
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 997bebf..9251849 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -428,6 +428,58 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+enum pmdhealth_status ka_get_polled_ports_status(unsigned core_id)
+{
+if (OVS_UNLIKELY(!ka_info)) {
+return FAILURE_STATE;
+}
+
+int failed = 0;
+int n_ports = ka_info->ext_stats[core_id].num_poll_ports;
+for (int i = 0; i < n_ports; i++) {
+int state;
+state =
+  ka_info->ext_stats[core_id].port_stats[i].state[PORT_LINK_CHECK];
+if (state == FAILURE_STATE) {
+failed = 1;
+break;
+}
+}
+
+if (!failed) {
+return ACTIVE_RUN_STATE;
+} else {
+return FAILURE_STATE;
+}
+}
+
+void
+ka_info_update_port_status(const char *port, int qid OVS_UNUSED,
+   char *link_state, int core_id, int idx)
+{
+if (OVS_UNLIKELY(!ka_info)) {
+return;
+}
+
+ka_info->ext_stats[core_id].num_poll_ports = idx;
+
+if (OVS_LIKELY(core_id != NON_PMD_CORE_ID)) {
+ka_info->ext_stats[core_id].port_stats[idx].port = port;
+ka_info->ext_stats[core_id].port_stats[idx].link_state =
+   link_state;
+}
+
+int state;
+if (!strcmp(link_state, "down")) {
+state = FAILURE_STATE;
+} else {
+state = ACTIVE_RUN_STATE;
+}
+
+ka_info->ext_stats[core_id].port_stats[idx].state[PORT_LINK_CHECK] =
+   state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[] OVS_UNUSED, void *ka_info_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 8877ca6..69697bd 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -47,6 +47,12 @@ enum pmdhealth_check {
 PMD_HC_COMPLETE
 };
 
+enum port_health_check {
+PORT_LINK_CHECK = 0,
+PORT_STATS_CHECK,
+PORT_NUM_CHECKS
+};
+
 struct ka_process_info {
 char name[16];
 int tid;
@@ -60,10 +66,12 @@ struct ka_process_info {
 
 struct poll_port_stats {
 const char *port;
-int qid;
+char *link_state;
+int state[PORT_NUM_CHECKS];
 };
 
 struct pmd_extended_stats {
+char *health_status;
 struct poll_port_stats *port_stats;
 int num_poll_ports;
 };
@@ -92,6 +100,11 @@ enum keepalive_status {
 ka_init_success
 };
 
+enum pmdhealth_status {
+FAILURE_STATE = 0,
+ACTIVE_RUN_STATE
+};
+
 void ka_init(const struct smap *);
 void ka_destroy(void);
 void ka_set_pmd_state_ts(unsigned, enum keepalive

[ovs-dev] [RFC PATCH v3 15/18] keepalive: Check the PMD cycle stats as part of PMD health checks.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds the support to check the PMD cycle stats. If the cycles
aren't changing for a duration of time this can be flagged as possible
PMD stall.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 16 +---
 lib/dpif-netdev.h |  6 ++
 lib/keepalive.c   | 51 +++
 lib/keepalive.h   |  3 +++
 4 files changed, 69 insertions(+), 7 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ad48ee5..b1a9fc4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -328,12 +328,6 @@ enum dp_stat_type {
 DP_N_STATS
 };
 
-enum pmd_cycles_counter_type {
-PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
-PMD_CYCLES_PROCESSING,  /* Cycles spent processing packets */
-PMD_N_CYCLES
-};
-
 #define XPS_TIMEOUT_MS 500LL
 
 /* Contained by struct dp_netdev_port's 'rxqs' member.  */
@@ -978,6 +972,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 struct rxq_poll *poll;
 int port_link_status = 0;
 int port_stats = 0;
+int pmd_polling = 0;
+uint64_t cycles[PMD_N_CYCLES];
 
 struct svec pmd_poll_list;
 svec_init(_poll_list);
@@ -1005,6 +1001,11 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 }
 svec_destroy(_poll_list);
 
+for (int idx = 0; idx < ARRAY_SIZE(cycles); idx++) {
+atomic_read_relaxed(>cycles.n[idx], [idx]);
+}
+pmd_polling = ka_info_update_pmd_cycles(pmd->core_id, cycles);
+
 int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
 switch (pmd_hc_state) {
 case PMD_HC_ENABLE:
@@ -1018,7 +1019,8 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 port_stats = ka_get_polled_ports_stats(pmd->core_id);
 
 if (port_link_status == ACTIVE_RUN_STATE &&
-   port_stats == ACTIVE_RUN_STATE ) {
+  port_stats == ACTIVE_RUN_STATE &&
+pmd_polling == ACTIVE_RUN_STATE) {
 ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
 }
 break;
diff --git a/lib/dpif-netdev.h b/lib/dpif-netdev.h
index 6db6ed2..e7c2400 100644
--- a/lib/dpif-netdev.h
+++ b/lib/dpif-netdev.h
@@ -33,6 +33,12 @@ extern "C" {
  * headers to be aligned on a 4-byte boundary.  */
 enum { DP_NETDEV_HEADROOM = 2 + VLAN_HEADER_LEN };
 
+enum pmd_cycles_counter_type {
+PMD_CYCLES_POLLING, /* Cycles spent polling NICs. */
+PMD_CYCLES_PROCESSING,  /* Cycles spent processing packets */
+PMD_N_CYCLES
+};
+
 bool dpif_is_netdev(const struct dpif *);
 
 #define NR_QUEUE   1
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 4234912..3690b70 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -577,6 +577,57 @@ ka_info_update_port_statistics(const struct netdev *netdev,
 state;
 }
 
+int
+ka_info_update_pmd_cycles(int core_id, uint64_t cycles[PMD_N_CYCLES])
+{
+int pmd_state = ACTIVE_RUN_STATE;
+if (!ka_info) {
+return FAILURE_STATE;
+}
+
+uint64_t total_cycles = 0;
+for (int i = 0; i < PMD_N_CYCLES; i++) {
+if (cycles[i] > 0) {
+total_cycles += cycles[i];
+}
+}
+
+if (!total_cycles) {
+return -1;
+}
+
+int pmd_hc_state = ka_get_pmd_health_check_state(core_id);
+if (PMD_HC_ENABLE == pmd_hc_state) {
+ka_info->ext_stats[core_id].cycles[PMD_CYCLES_POLLING] =
+   cycles[PMD_CYCLES_POLLING];
+
+ka_info->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING] =
+   cycles[PMD_CYCLES_PROCESSING];
+}
+
+if (PMD_HC_PROGRESS == pmd_hc_state) {
+uint64_t polling_cycles_cnt = 0, proc_cycles_cnt = 0;
+uint64_t prev_poll_cycles =
+ka_info->ext_stats[core_id].cycles[PMD_CYCLES_POLLING];
+uint64_t prev_proc_cycles =
+ka_info->ext_stats[core_id].cycles[PMD_CYCLES_PROCESSING];
+
+VLOG_DBG_RL(, "Keepalive: Going to check the PMD thresholds now.");
+
+polling_cycles_cnt = cycles[PMD_CYCLES_POLLING] - prev_poll_cycles;
+
+proc_cycles_cnt = cycles[PMD_CYCLES_PROCESSING]
+   - prev_proc_cycles;
+
+if (!polling_cycles_cnt && !proc_cycles_cnt) {
+VLOG_DBG("PMD FAILURE!");
+pmd_state = FAILURE_STATE;
+}
+}
+
+return pmd_state;
+}
+
 static void
 ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
const char *argv[] OVS_UNUSED, void *ka_info_)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index a132d74..1bd639b 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -27,6 +27,7 @@
 #define KA_DP_MAXCORES 128
 #endif /* DPDK_NETDEV */
 
+#include "dpif-netdev.h"
 #include "netdev.h"
 
 struct smap;
@@ -76,6 +77,7 @@ struct poll_port_stats {

[ovs-dev] [RFC PATCH v3 14/18] keepalive: Check the packet statistics as part of PMD health checks.

2017-06-18 Thread Bhanuprakash Bodireddy
This commit adds the support to check the packet statistics on the port
polled by PMD thread. If the packets aren't processed due to PMD thread
stall/deadlock the statistics wont update and this can be used by
monitoring framework to confirm PMD failure.

This mechanism has limitation with MQ enabled. In some cases queues of
the DPDK port can be polled by different PMD threads. Even if one PMD
thread stalls the port statistics will be incremented due to an other
queue processed by different PMD. The function can return active state
considering the packets processed in this case.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 25 +++---
 lib/keepalive.c   | 97 +++
 lib/keepalive.h   |  5 +++
 3 files changed, 122 insertions(+), 5 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 4d8d3e7..ad48ee5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -975,8 +975,9 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 static void
 pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 {
-int port_link_status = 0;
 struct rxq_poll *poll;
+int port_link_status = 0;
+int port_stats = 0;
 
 struct svec pmd_poll_list;
 svec_init(_poll_list);
@@ -991,22 +992,36 @@ pmd_health_check(struct dp_netdev_pmd_thread *pmd)
 int i = 0;
 SVEC_FOR_EACH (i, port_name, _poll_list) {
 struct netdev *dev = netdev_from_name(port_name);
+VLOG_DBG("Keepalive: Checking port %s", port_name);
 if (dev) {
 char *link_state = netdev_get_carrier(dev) ? "up" : "down";
 ka_info_update_port_status(port_name, 0, link_state,
 pmd->core_id, i);
+if (!strcmp(link_state, "up")) {
+ka_info_update_port_statistics(dev, pmd->core_id, i);
+}
 netdev_close(dev);
 }
 }
 svec_destroy(_poll_list);
 
-port_link_status = ka_get_polled_ports_status(pmd->core_id);
-
 int pmd_hc_state = ka_get_pmd_health_check_state(pmd->core_id);
-if (PMD_HC_COMPLETE == pmd_hc_state) {
-if (port_link_status == ACTIVE_RUN_STATE) {
+switch (pmd_hc_state) {
+case PMD_HC_ENABLE:
+ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_PROGRESS);
+break;
+case PMD_HC_PROGRESS:
+ka_set_pmd_health_check_state(pmd->core_id, PMD_HC_COMPLETE);
+break;
+case PMD_HC_COMPLETE:
+port_link_status = ka_get_polled_ports_status(pmd->core_id);
+port_stats = ka_get_polled_ports_stats(pmd->core_id);
+
+if (port_link_status == ACTIVE_RUN_STATE &&
+   port_stats == ACTIVE_RUN_STATE ) {
 ka_set_pmd_state_ts(pmd->core_id, KA_STATE_ALIVE, 0);
 }
+break;
 }
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 9251849..4234912 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -24,6 +24,7 @@
 #include "dpdk.h"
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "netdev-dpdk.h"
 #include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
@@ -31,6 +32,7 @@
 #include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
 
 static bool keepalive_enable = false;/* Keepalive disabled by default */
 static bool ka_init_status = ka_init_failure; /* Keepalive initialization */
@@ -453,6 +455,31 @@ enum pmdhealth_status ka_get_polled_ports_status(unsigned 
core_id)
 }
 }
 
+enum pmdhealth_status ka_get_polled_ports_stats(unsigned core_id)
+{
+if (!ka_info) {
+return FAILURE_STATE;
+}
+
+int failed = 0;
+int n_ports = ka_info->ext_stats[core_id].num_poll_ports;
+for (int i = 0; i < n_ports; i++) {
+int state;
+state =
+  ka_info->ext_stats[core_id].port_stats[i].state[PORT_STATS_CHECK];
+if (state == FAILURE_STATE) {
+failed = 1;
+break;
+}
+}
+
+if (!failed) {
+return ACTIVE_RUN_STATE;
+} else {
+return FAILURE_STATE;
+}
+}
+
 void
 ka_info_update_port_status(const char *port, int qid OVS_UNUSED,
char *link_state, int core_id, int idx)
@@ -480,6 +507,76 @@ ka_info_update_port_status(const char *port, int qid 
OVS_UNUSED,
state;
 }
 
+void
+ka_info_update_port_statistics(const struct netdev *netdev,
+  int core_id, int idx)
+{
+int error;
+int state = FAILURE_STATE;
+
+if (!ka_info) {
+VLOG_ERR_RL(, "Keepalive disabled");
+return;
+}
+ka_info->ext_stats[core_id].num_poll_ports = idx;
+
+

[ovs-dev] [PATCH 1/2] checkpatch: Suggest ovs_assert() to author.

2017-06-18 Thread Bhanuprakash Bodireddy
Suggest the author to use the OVS wrapper of the assert function.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 utilities/checkpatch.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/utilities/checkpatch.py b/utilities/checkpatch.py
index b45a255..304d2fd 100755
--- a/utilities/checkpatch.py
+++ b/utilities/checkpatch.py
@@ -256,6 +256,7 @@ std_functions = [
 ('strerror', 'Use ovs_strerror() in place of strerror()'),
 ('sleep', 'Use xsleep() in place of sleep()'),
 ('abort', 'Use ovs_abort() in place of abort()'),
+('assert', 'Use ovs_assert() in place of assert()'),
 ('error', 'Use ovs_error() in place of error()'),
 ]
 checks += [
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 10/10] NEWS: Add keepalive support information in NEWS.

2017-09-15 Thread Bhanuprakash Bodireddy
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 NEWS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS b/NEWS
index 6a5d2bf..7f5ff78 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,8 @@ Post-v2.8.0
- OVN:
  * The "requested-chassis" option for a logical switch port now accepts a
chassis "hostname" in addition to a chassis "name".
+   - Userspace Datapath:
+ * Added Keepalive support for userspace datapath.
 
 v2.8.0 - xx xxx 
 -
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 04/10] dpif-netdev: Register packet processing cores to KA framework.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit registers the packet processing PMD threads to keepalive
framework. Only PMDs that have rxqs mapped will be registered and
actively monitored by KA framework.

This commit spawns a keepalive thread that will dispatch heartbeats to
PMD threads. The pmd threads respond to heartbeats by marking themselves
alive. As long as PMD responds to heartbeats it is considered 'healthy'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  79 ++
 lib/keepalive.c   | 191 --
 lib/keepalive.h   |  20 ++
 lib/ovs-thread.c  |   6 ++
 lib/ovs-thread.h  |   1 +
 lib/util.c|  22 +++
 lib/util.h|   1 +
 7 files changed, 316 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ca74df8..da419d5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -49,6 +49,7 @@
 #include "flow.h"
 #include "hmapx.h"
 #include "id-pool.h"
+#include "keepalive.h"
 #include "latch.h"
 #include "netdev.h"
 #include "netdev-vport.h"
@@ -591,6 +592,7 @@ struct dp_netdev_pmd_thread {
 uint64_t last_reload_seq;
 atomic_bool reload; /* Do we need to reload ports? */
 pthread_t thread;
+pid_t tid;  /* Thread id of this pmd thread. */
 unsigned core_id;   /* CPU core id of this pmd thread. */
 int numa_id;/* numa node id of this pmd thread. */
 bool isolated;
@@ -1018,6 +1020,72 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void *
+ovs_keepalive(void *f_ OVS_UNUSED)
+{
+pthread_detach(pthread_self());
+
+for (;;) {
+int interval;
+
+interval = get_ka_interval();
+xnanosleep(interval);
+}
+
+return NULL;
+}
+
+/* Kickstart 'ovs_keepalive' thread. */
+static void
+ka_thread_start(struct dp_netdev *dp)
+{
+static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start()) {
+ovs_thread_create("ovs_keepalive", ovs_keepalive, dp);
+
+ovsthread_once_done();
+}
+}
+
+/* Register the datapath threads. This gets invoked on every datapath
+ * reconfiguration. The pmd thread[s] having rxq[s] mapped will be
+ * registered to KA framework.
+ */
+static void
+ka_register_datapath_threads(struct dp_netdev *dp)
+{
+if (!ka_is_enabled()) {
+return;
+}
+
+ka_thread_start(dp);
+
+ka_reload_datapath_threads_begin();
+
+struct dp_netdev_pmd_thread *pmd;
+CMAP_FOR_EACH (pmd, node, >poll_threads) {
+/*  Register only PMD threads. */
+if (pmd->core_id != NON_PMD_CORE_ID) {
+/* Skip PMD thread with no rxqs mapping. */
+if (OVS_UNLIKELY(!hmap_count(>poll_list))) {
+/* Rxq mapping changes due to datapath reconfiguration.
+ * If no rxqs mapped to PMD now due to reconfiguration,
+ * unregister the pmd thread. */
+ka_unregister_thread(pmd->tid);
+continue;
+}
+
+ka_register_thread(pmd->tid);
+VLOG_INFO("Registered PMD thread [%d] on Core[%d] to KA framework",
+  pmd->tid, pmd->core_id);
+}
+}
+ka_cache_registered_threads();
+
+ka_reload_datapath_threads_end();
+}
+
 static void
 dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc,
   const char *argv[], void *aux OVS_UNUSED)
@@ -3821,6 +3889,9 @@ reconfigure_datapath(struct dp_netdev *dp)
 
 /* Reload affected pmd threads. */
 reload_affected_pmds(dp);
+
+/* Register datapath threads to KA monitoring. */
+ka_register_datapath_threads(dp);
 }
 
 /* Returns true if one of the netdevs in 'dp' requires a reconfiguration */
@@ -4023,6 +4094,8 @@ pmd_thread_main(void *f_)
 
 /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
 ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
+/* Stores tid in to 'pmd->tid'. */
+ovsthread_settid(>tid);
 ovs_numa_thread_setaffinity_core(pmd->core_id);
 dpdk_set_lcore_id(pmd->core_id);
 poll_cnt = pmd_load_queues_and_ports(pmd, _list);
@@ -4056,6 +4129,9 @@ reload:
   : PMD_CYCLES_IDLE);
 }
 
+/* Mark PMD thread alive. */
+ka_mark_pmd_thread_alive(pmd->tid);
+
 if (lc++ > 1024) {
 bool reload;
 
@@ -4089,6 +4165,9 @@ reload:
 }
 
 emc_cache_uninit(>flow_cache);
+
+ka_unregister_thread(pmd->tid);
+
 free(poll_list);
 pmd_free_cached_ports(pmd);
 return NULL;
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 1f151f6..da4defd 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -19,6 +19,7 @@
 #include "keepalive.h"
 #include "

[ovs-dev] [PATCH v5 05/10] dpif-netdev: Enable heartbeats for DPDK datapath.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit adds heartbeat mechanism support for DPDK datapath. Heartbeats
are sent to registered PMD threads at predefined intervals (as set in ovsdb
with 'keepalive-interval').

The heartbeats are only enabled when there is atleast one port added to
the bridge and with active PMD thread polling the port.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 15 +--
 lib/keepalive.c   | 44 
 lib/keepalive.h   |  1 +
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index da419d5..fd0ce61 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1021,14 +1021,25 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 }
 
 static void *
-ovs_keepalive(void *f_ OVS_UNUSED)
+ovs_keepalive(void *f_)
 {
+struct dp_netdev *dp = f_;
+
 pthread_detach(pthread_self());
 
 for (;;) {
-int interval;
+int interval, n_pmds;
+bool hb_enable;
 
 interval = get_ka_interval();
+n_pmds = cmap_count(>poll_threads) - 1;
+hb_enable = (n_pmds > 0) ? true : false;
+
+/* Dispatch heartbeats only if pmd[s] exist. */
+if (hb_enable) {
+dispatch_heartbeats();
+}
+
 xnanosleep(interval);
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index da4defd..3067e73 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -284,6 +284,50 @@ ka_mark_pmd_thread_sleep(int tid)
 }
 }
 
+/* Dispatch heartbeats from 'ovs_keepalive' thread. */
+void
+dispatch_heartbeats(void)
+{
+struct ka_process_info *pinfo, *pinfo_next;
+
+/* Iterates over the list of processes in 'cached_process_list' map. */
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node,
+_info.cached_process_list) {
+if (pinfo->state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->state) {
+case KA_STATE_UNUSED:
+break;
+case KA_STATE_ALIVE:
+pinfo->state = KA_STATE_MISSING;
+pinfo->last_seen_time = time_wall_msec();
+break;
+case KA_STATE_MISSING:
+pinfo->state = KA_STATE_DEAD;
+break;
+case KA_STATE_DEAD:
+pinfo->state = KA_STATE_GONE;
+break;
+case KA_STATE_GONE:
+break;
+case KA_STATE_DOZING:
+pinfo->state = KA_STATE_SLEEP;
+pinfo->last_seen_time = time_wall_msec();
+break;
+case KA_STATE_SLEEP:
+break;
+default:
+OVS_NOT_REACHED();
+}
+
+/* Invoke 'ka_update_thread_state' cb function to update state info
+ * in to 'ka_info.process_list' map. */
+ka_info.relay_cb(pinfo->tid, pinfo->state, pinfo->last_seen_time);
+}
+}
+
 void
 ka_init(const struct smap *ovs_other_config)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 9e8bfdf..392a701 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -102,6 +102,7 @@ void ka_free_cached_threads(void);
 void ka_cache_registered_threads(void);
 void ka_mark_pmd_thread_alive(int);
 void ka_mark_pmd_thread_sleep(int);
+void dispatch_heartbeats(void);
 void ka_init(const struct smap *);
 void ka_destroy(void);
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 01/10] process: Extend get_process_info() for additional fields.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit enables the fields relating to process name and the core
number the process was last scheduled. The fields will be used by keepalive
monitoring framework in future commits.

This commit also fixes the following "sparse" warning:

  lib/process.c:439:16: error: use of assignment suppression and length
  modifier together in gnu_scanf format [-Werror=format=].

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/process.c | 43 +++
 lib/process.h |  2 ++
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/lib/process.c b/lib/process.c
index 3e119b5..95df112 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -64,7 +64,8 @@ struct raw_process_info {
 long long int uptime;   /* ms since started. */
 long long int cputime;  /* ms of CPU used during 'uptime'. */
 pid_t ppid; /* Parent. */
-char name[18];  /* Name (surrounded by parentheses). */
+int core_id;/* Core id last executed on. */
+char name[18];  /* Name. */
 };
 
 /* Pipe used to signal child termination. */
@@ -421,7 +422,7 @@ get_raw_process_info(pid_t pid, struct raw_process_info 
*raw)
 
 n = fscanf(stream,
"%*d "   /* (1. pid) */
-   "%17s "  /* 2. process name */
+   "(%17[^)]) " /* 2. process name */
"%*c "   /* (3. state) */
"%lu "   /* 4. ppid */
"%*d "   /* (5. pgid) */
@@ -444,33 +445,34 @@ get_raw_process_info(pid_t pid, struct raw_process_info 
*raw)
"%llu "  /* 22. start_time */
"%llu "  /* 23. vsize */
"%llu "  /* 24. rss */
+   "%*u "   /* (25. rsslim) */
+   "%*u "   /* (26. start_code) */
+   "%*u "   /* (27. end_code) */
+   "%*u "   /* (28. start_stack) */
+   "%*u "   /* (29. esp) */
+   "%*u "   /* (30. eip) */
+   "%*u "   /* (31. pending signals) */
+   "%*u "   /* (32. blocked signals) */
+   "%*u "   /* (33. ignored signals) */
+   "%*u "   /* (34. caught signals) */
+   "%*u "   /* (35. whcan) */
+   "%*u "   /* (36. always 0) */
+   "%*u "   /* (37. always 0) */
+   "%*d "   /* (38. exit_signal) */
+   "%d "/* 39. task_cpu */
 #if 0
/* These are here for documentation but #if'd out to save
 * actually parsing them from the stream for no benefit. */
-   "%*lu "  /* (25. rsslim) */
-   "%*lu "  /* (26. start_code) */
-   "%*lu "  /* (27. end_code) */
-   "%*lu "  /* (28. start_stack) */
-   "%*lu "  /* (29. esp) */
-   "%*lu "  /* (30. eip) */
-   "%*lu "  /* (31. pending signals) */
-   "%*lu "  /* (32. blocked signals) */
-   "%*lu "  /* (33. ignored signals) */
-   "%*lu "  /* (34. caught signals) */
-   "%*lu "  /* (35. whcan) */
-   "%*lu "  /* (36. always 0) */
-   "%*lu "  /* (37. always 0) */
-   "%*d "   /* (38. exit_signal) */
-   "%*d "   /* (39. task_cpu) */
"%*u "   /* (40. rt_priority) */
"%*u "   /* (41. policy) */
"%*llu " /* (42. blkio_ticks) */
"%*lu "  /* (43. gtime) */
"%*ld"   /* (44. cgtime) */
 #endif
-   , raw->name, , , , _time, , );
+   , raw->name, , , , _time,
+  , , >core_id);
 fclose(stream);
-if (n != 7) {
+if (n != 8) {
 VLOG_ERR_ONCE("%s: fscanf failed", file_name);
 return false;
 }
@@ -496,12 +498,14 @@ get_process_info(pid_t pid, struct process_info *pinfo)
 return false;
 }
 
+ovs_strlcpy(pinfo->name, child.name, sizeof pinfo->name);
 pinfo->vsz = child.vsz;
 pinfo->rss = child.rss;
 pinfo->booted = child.uptime;
 pinfo->crashes = 0;
 pinfo->uptime = child.uptime;
   

[ovs-dev] [PATCH v5 00/10] Add OVS DPDK keep-alive functionality.

2017-09-15 Thread Bhanuprakash Bodireddy
Keepalive feature is aimed at achieving Fastpath Service Assurance
in OVS-DPDK deployments. It adds support for monitoring the packet
processing threads by dispatching heartbeats at regular intervals.
 
keepalive feature can be enabled through below OVSDB settings.

enable-keepalive=true
  - Keepalive feature is disabled by default and should be enabled
at startup before ovs-vswitchd daemon is started.

keepalive-interval="5000"
  - Timer interval in milliseconds for monitoring the packet
processing cores.

v4 -> v5
  * Add 3 more patches to the series
 - xnanosleep()
 - Documentation
 - Update to NEWS
  * Remove all references to core_id and instead implemented thread based 
tracking.
  * Addressed most of the comments in v4.

v3 -> v4
  * Split the functionality in to 2 parts. This patch series only updates
PMD status to OVSDB. The incremental patch series to handle false positives,
negatives and more checking and stats. 
  * Remove code from netdev layer and dependency on rte_keepalive lib.
  * Merged few patches and simplified the patch series.
  * Timestamp in human readable form.

v2 -> v3
  * Rebase.
  * Verified with dpdk-stable-17.05.1 release.
  * Fixed build issues with MSVC and cross checked with appveyor.

v1 -> v2
  * Rebase
  * Drop 01/20 Patch "Consolidate process related APIs" of V1 as it
is already applied as separate patch.

RFCv3 -> v1
  * Made changes to fix failures in some unit test cases.
  * some more code cleanup w.r.t process related APIs.

RFCv2 -> RFCv3
  * Remove POSIX shared memory block implementation (suggested by Aaron).
  * Rework the logic to register and track threads instead of cores. This way
in the future any thread can be registered to KA framework. For now only PMD
threads are tracked (suggested by Aaron).
  * Refactor few APIs and further clean up the code.
   
RFCv1 -> RFCv2
  * Merged the xml and schema commits to later commit where the actual
implementation is done(suggested by Ben).
  * Fix ovs-appctl keepalive/* hang issue when KA disabled.
  * Fixed memory leaks with appctl commands for keepalive/pmd-health-show,
pmd-xstats-show.
  * Refactor code and fixed APIs dealing with PMD health monitoring.


Bhanuprakash Bodireddy (10):
  process: Extend get_process_info() for additional fields.
  Keepalive: Add initial keepalive support.
  util: Add high resolution sleep support.
  dpif-netdev: Register packet processing cores to KA framework.
  dpif-netdev: Enable heartbeats for DPDK datapath.
  keepalive: Retrieve PMD status periodically.
  bridge: Update keepalive status in OVSDB.
  keepalive: Add support to query keepalive status and statistics.
  Documentation: Update DPDK doc with Keepalive feature.
  NEWS: Add keepalive support information in NEWS.

 Documentation/howto/dpdk.rst | 113 +
 NEWS |   2 +
 lib/automake.mk  |   2 +
 lib/dpif-netdev.c|  91 +++
 lib/keepalive.c  | 556 +++
 lib/keepalive.h  | 111 +
 lib/ovs-thread.c |   6 +
 lib/ovs-thread.h |   1 +
 lib/process.c|  43 ++--
 lib/process.h|   2 +
 lib/timeval.c|   2 +-
 lib/timeval.h|   1 +
 lib/util.c   |  41 
 lib/util.h   |   2 +
 vswitchd/bridge.c|  29 +++
 vswitchd/vswitch.ovsschema   |   8 +-
 vswitchd/vswitch.xml |  49 
 17 files changed, 1036 insertions(+), 23 deletions(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 07/10] bridge: Update keepalive status in OVSDB.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit allows vswitchd thread to update the OVSDB with the
status of all registered PMD threads. The status can be monitored
using ovsdb-client and the sample output is below.

$ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive

rowaction keepalive
7b746190-ee71-4dcc-becf-f8cb9c7cb909 old  {
"pmd62"="ALIVE,0,9226457935188922"
"pmd63"="ALIVE,1,150678618"
"pmd64"="ALIVE,2,150678618"
"pmd65"="ALIVE,3,150678618"}

 new  {
"pmd62"="ALIVE,0,9226460230167364"
"pmd63"="ALIVE,1,150679619"
"pmd64"="ALIVE,2,150679619"
"pmd65"="ALIVE,3,150679619""}

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c   | 15 +++
 lib/keepalive.h   |  1 +
 vswitchd/bridge.c | 26 ++
 3 files changed, 42 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index fe81919..b140d21 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -350,6 +350,21 @@ get_ka_stats(void)
 ovs_mutex_unlock();
 }
 
+struct smap *
+ka_stats_run(void)
+{
+struct smap *ka_stats = NULL;
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+ka_stats = keepalive_stats;
+keepalive_stats = NULL;
+}
+ovs_mutex_unlock();
+
+return ka_stats;
+}
+
 /* Dispatch heartbeats from 'ovs_keepalive' thread. */
 void
 dispatch_heartbeats(void)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 6e6ec68..1f76c34 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -103,6 +103,7 @@ void ka_cache_registered_threads(void);
 void ka_mark_pmd_thread_alive(int);
 void ka_mark_pmd_thread_sleep(int);
 void get_ka_stats(void);
+struct smap *ka_stats_run(void);
 void dispatch_heartbeats(void);
 void ka_init(const struct smap *);
 void ka_destroy(void);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index dd9a009..82649c9 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -286,6 +286,7 @@ static bool port_is_synthetic(const struct port *);
 
 static void reconfigure_system_stats(const struct ovsrec_open_vswitch *);
 static void run_system_stats(void);
+static void run_keepalive_stats(void);
 
 static void bridge_configure_mirrors(struct bridge *);
 static struct mirror *mirror_create(struct bridge *,
@@ -403,6 +404,7 @@ bridge_init(const char *remote)
 
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_cur_cfg);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_statistics);
+ovsdb_idl_omit_alert(idl, _open_vswitch_col_keepalive);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_datapath_types);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_iface_types);
 ovsdb_idl_omit(idl, _open_vswitch_col_external_ids);
@@ -2686,6 +2688,29 @@ run_system_stats(void)
 }
 }
 
+void
+run_keepalive_stats(void)
+{
+struct smap *ka_stats;
+const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(idl);
+
+ka_stats = ka_stats_run();
+if (ka_stats && cfg) {
+struct ovsdb_idl_txn *txn;
+struct ovsdb_datum datum;
+
+txn = ovsdb_idl_txn_create(idl);
+ovsdb_datum_from_smap(, ka_stats);
+smap_destroy(ka_stats);
+ovsdb_idl_txn_write(>header_, _open_vswitch_col_keepalive,
+);
+ovsdb_idl_txn_commit(txn);
+ovsdb_idl_txn_destroy(txn);
+
+free(ka_stats);
+}
+}
+
 static const char *
 ofp12_controller_role_to_str(enum ofp12_controller_role role)
 {
@@ -3039,6 +3064,7 @@ bridge_run(void)
 run_stats_update();
 run_status_update();
 run_system_stats();
+run_keepalive_stats();
 }
 
 void
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 06/10] keepalive: Retrieve PMD status periodically.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit implements APIs to retrieve the PMD thread status and return
the status in the below format for each PMD thread.

  Format: pmdid="status,core id,last_seen_timestamp(epoch)"
  eg: pmd62="ALIVE,2,150332575"
  pmd63="GONE,3,150332525"

The status is periodically retrieved by keepalive thread and stored in
keepalive_stats struc which later shall be retrieved by vswitchd thread.
In case of four PMD threads the status is as below:

   "pmd62"="ALIVE,0,150332575"
   "pmd63"="ALIVE,1,150332575"
   "pmd64"="ALIVE,2,150332575"
   "pmd65"="ALIVE,3,150332575"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  1 +
 lib/keepalive.c   | 66 +++
 lib/keepalive.h   |  1 +
 3 files changed, 68 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index fd0ce61..56a3422 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1038,6 +1038,7 @@ ovs_keepalive(void *f_)
 /* Dispatch heartbeats only if pmd[s] exist. */
 if (hb_enable) {
 dispatch_heartbeats();
+get_ka_stats();
 }
 
 xnanosleep(interval);
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 3067e73..fe81919 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -19,6 +19,7 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "ovs-thread.h"
 #include "process.h"
 #include "seq.h"
 #include "timeval.h"
@@ -29,6 +30,9 @@ static bool keepalive_enable = false;  /* Keepalive 
disabled by default. */
 static uint32_t keepalive_timer_interval;  /* keepalive timer interval. */
 static struct keepalive_info ka_info;
 
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct smap *keepalive_stats OVS_GUARDED_BY(mutex);
+
 /* Returns true if state update is allowed, false otherwise. */
 static bool
 ka_can_update_state(void)
@@ -284,6 +288,68 @@ ka_mark_pmd_thread_sleep(int tid)
 }
 }
 
+static void
+get_pmd_status(struct smap *ka_pmd_stats)
+OVS_REQUIRES(ka_info.proclist_mutex)
+{
+struct ka_process_info *pinfo, *pinfo_next;
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info.process_list) {
+char *state = NULL;
+if (pinfo->state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_UNUSED:
+break;
+default:
+OVS_NOT_REACHED();
+}
+
+smap_add_format(ka_pmd_stats, pinfo->name, "%s,%d,%ld",
+state, pinfo->core_id, pinfo->last_seen_time);
+}
+}
+
+void
+get_ka_stats(void)
+{
+struct smap *ka_pmd_stats;
+ka_pmd_stats = xmalloc(sizeof *ka_pmd_stats);
+smap_init(ka_pmd_stats);
+
+ovs_mutex_lock(_info.proclist_mutex);
+get_pmd_status(ka_pmd_stats);
+ovs_mutex_unlock(_info.proclist_mutex);
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+smap_destroy(keepalive_stats);
+free(keepalive_stats);
+keepalive_stats = NULL;
+}
+keepalive_stats = ka_pmd_stats;
+ovs_mutex_unlock();
+}
+
 /* Dispatch heartbeats from 'ovs_keepalive' thread. */
 void
 dispatch_heartbeats(void)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 392a701..6e6ec68 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -102,6 +102,7 @@ void ka_free_cached_threads(void);
 void ka_cache_registered_threads(void);
 void ka_mark_pmd_thread_alive(int);
 void ka_mark_pmd_thread_sleep(int);
+void get_ka_stats(void);
 void dispatch_heartbeats(void);
 void ka_init(const struct smap *);
 void ka_destroy(void);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 02/10] Keepalive: Add initial keepalive support.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit introduces the initial keepalive support by adding
'keepalive' module and also helper and initialization functions
that will be invoked by later commits.

This commit adds new ovsdb column "keepalive" that shows the status
of the datapath threads. This is implemented for DPDK datapath and
only status of PMD threads is reported.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/automake.mk|   2 +
 lib/keepalive.c| 145 +
 lib/keepalive.h|  88 +++
 vswitchd/bridge.c  |   3 +
 vswitchd/vswitch.ovsschema |   8 ++-
 vswitchd/vswitch.xml   |  49 +++
 6 files changed, 293 insertions(+), 2 deletions(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 2415f4c..0d99f0a 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -110,6 +110,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/json.c \
lib/jsonrpc.c \
lib/jsonrpc.h \
+   lib/keepalive.c \
+   lib/keepalive.h \
lib/lacp.c \
lib/lacp.h \
lib/latch.h \
diff --git a/lib/keepalive.c b/lib/keepalive.c
new file mode 100644
index 000..1f151f6
--- /dev/null
+++ b/lib/keepalive.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2017 Intel, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+
+#include "keepalive.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+#include "seq.h"
+#include "timeval.h"
+
+VLOG_DEFINE_THIS_MODULE(keepalive);
+
+static bool keepalive_enable = false;  /* Keepalive disabled by default. */
+static uint32_t keepalive_timer_interval;  /* keepalive timer interval. */
+static struct keepalive_info ka_info;
+
+/* Returns true if keepalive is enabled, false otherwise. */
+bool
+ka_is_enabled(void)
+{
+return keepalive_enable;
+}
+
+/* Finds the thread by 'tid' in 'process_list' map and update
+ * the thread state and last_seen_time stamp.  This is invoked
+ * periodically(based on keepalive-interval) as part of callback
+ * function in the context of keepalive thread.
+ */
+static void
+ka_set_thread_state_ts(pid_t tid, enum keepalive_state state,
+   uint64_t last_alive)
+{
+struct ka_process_info *pinfo;
+
+ovs_mutex_lock(_info.proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info.process_list) {
+if (pinfo->tid == tid) {
+pinfo->state = state;
+pinfo->last_seen_time = last_alive;
+}
+}
+ovs_mutex_unlock(_info.proclist_mutex);
+}
+
+/* Retrieve and return the keepalive timer interval from OVSDB. */
+static uint32_t
+ka_get_timer_interval(const struct smap *ovs_other_config)
+{
+uint32_t ka_interval;
+
+/* Timer granularity in milliseconds
+ * Defaults to OVS_KEEPALIVE_TIMEOUT(ms) if not set */
+ka_interval = smap_get_int(ovs_other_config, "keepalive-interval",
+   OVS_KEEPALIVE_DEFAULT_TIMEOUT);
+
+VLOG_INFO("Keepalive timer interval set to %"PRIu32" (ms)\n", ka_interval);
+return ka_interval;
+}
+
+/*
+ * This function is invoked periodically to write the status and
+ * last seen timestamp of the thread in to 'process_list' map.
+ */
+static void
+ka_update_thread_state(pid_t tid, const enum keepalive_state state,
+   uint64_t last_alive)
+{
+switch (state) {
+case KA_STATE_ALIVE:
+case KA_STATE_MISSING:
+ka_set_thread_state_ts(tid, KA_STATE_ALIVE, last_alive);
+break;
+case KA_STATE_UNUSED:
+case KA_STATE_DOZING:
+case KA_STATE_SLEEP:
+case KA_STATE_DEAD:
+case KA_STATE_GONE:
+ka_set_thread_state_ts(tid, state, last_alive);
+break;
+default:
+OVS_NOT_REACHED();
+}
+}
+
+/* Register relay callback function. */
+static void
+keepalive_register_relay_cb(ka_relay_cb cb, void *aux)
+{
+ka_info.relay_cb = cb;
+ka_info.relay_cb_data = aux;
+}
+
+void
+ka_init(const struct smap *ovs_other_config)
+{
+if (smap_get_bool(ovs_other_config, "enable-keepalive", false)) {
+static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start(_enab

[ovs-dev] [PATCH v5 03/10] util: Add high resolution sleep support.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit introduces xnanosleep() for the threads needing high
resolution sleep timeouts.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/timeval.c |  2 +-
 lib/timeval.h |  1 +
 lib/util.c| 19 +++
 lib/util.h|  1 +
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/lib/timeval.c b/lib/timeval.c
index dd63f03..8190b41 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -474,7 +474,7 @@ xclock_gettime(clock_t id, struct timespec *ts)
 }
 }
 
-static void
+void
 msec_to_timespec(long long int ms, struct timespec *ts)
 {
 ts->tv_sec = ms / 1000;
diff --git a/lib/timeval.h b/lib/timeval.h
index 7957dad..110745c 100644
--- a/lib/timeval.h
+++ b/lib/timeval.h
@@ -69,6 +69,7 @@ size_t strftime_msec(char *s, size_t max, const char *format,
  const struct tm_msec *);
 void xgettimeofday(struct timeval *);
 void xclock_gettime(clock_t, struct timespec *);
+void msec_to_timespec(long long int , struct timespec *);
 
 int get_cpu_usage(void);
 
diff --git a/lib/util.c b/lib/util.c
index 36e3731..4ad7eea 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -2197,6 +2197,25 @@ xsleep(unsigned int seconds)
 ovsrcu_quiesce_end();
 }
 
+/* High resolution sleep. */
+void
+xnanosleep(uint64_t ms)
+{
+ovsrcu_quiesce_start();
+#ifdef __linux__
+int retval;
+struct timespec ts_sleep;
+msec_to_timespec(ms, _sleep);
+
+int error = 0;
+do {
+retval = nanosleep(_sleep, NULL);
+error = retval < 0 ? errno : 0;
+} while (error == EINTR);
+#endif
+ovsrcu_quiesce_end();
+}
+
 /* Determine whether standard output is a tty or not. This is useful to decide
  * whether to use color output or not when --color option for utilities is set
  * to `auto`.
diff --git a/lib/util.h b/lib/util.h
index 764e0a0..0449fa1 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -489,6 +489,7 @@ ovs_u128_and(const ovs_u128 a, const ovs_u128 b)
 }
 
 void xsleep(unsigned int seconds);
+void xnanosleep(uint64_t ms);
 
 bool is_stdout_a_tty(void);
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 08/10] keepalive: Add support to query keepalive status and statistics.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit adds support to query keepalive status and statistics.

  $ ovs-appctl keepalive/status
keepAlive Status: Enabled

  $ ovs-appctl keepalive/pmd-health-show

  Keepalive status

keepalive status   : Enabled
keepalive interva l: 1000 ms
keepalive init time: 21 Aug 2017 16:20:31
PMD threads: 4

 PMDCORESTATE   LAST SEEN TIMESTAMP(UTC)
pmd620  ALIVE   21 Aug 2017 16:29:31
pmd631  ALIVE   21 Aug 2017 16:29:31
pmd642  ALIVE   21 Aug 2017 16:29:31
pmd653  GONE21 Aug 2017 16:26:31

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 103 
 1 file changed, 103 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index b140d21..9db1389 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -18,11 +18,13 @@
 
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
 #include "process.h"
 #include "seq.h"
 #include "timeval.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 
@@ -365,6 +367,101 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+static void
+ka_unixctl_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+
+ds_put_format(, "keepAlive Status: %s",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
+static void
+ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_info_)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\n\t\tKeepalive status\n\n");
+
+ds_put_format(, "keepalive status   : %s\n",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+if (!ka_is_enabled()) {
+goto out;
+}
+
+ds_put_format(, "keepalive interval : %"PRIu32" ms\n",
+  get_ka_interval());
+
+char *utc = xastrftime_msec("%d %b %Y %H:%M:%S",
+ka_info.init_time, true);
+ds_put_format(, "keepalive init time: %s \n", utc);
+
+struct keepalive_info *ka_info = (struct keepalive_info *)ka_info_;
+if (OVS_UNLIKELY(!ka_info)) {
+goto out;
+}
+
+ds_put_format(, "PMD threads: %"PRIu32" \n", 
ka_info->thread_cnt);
+ds_put_format(,
+  "\n PMD\tCORE\tSTATE\tLAST SEEN TIMESTAMP(UTC)\n");
+
+struct ka_process_info *pinfo, *pinfo_next;
+
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+char *state = NULL;
+
+if (pinfo->state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_UNUSED:
+break;
+default:
+OVS_NOT_REACHED();
+}
+
+utc = xastrftime_msec("%d %b %Y %H:%M:%S",
+pinfo->last_seen_time, true);
+
+ds_put_format(, "%s\t%2d\t%s\t%s\n",
+  pinfo->name, pinfo->core_id, state, utc);
+
+free(utc);
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+
+ds_put_format(, "\n");
+out:
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 /* Dispatch heartbeats from 'ovs_keepalive' thread. */
 void
 dispatch_heartbeats(void)
@@ -429,6 +526,12 @@ ka_init(const struct smap *ovs_other_config)
 
 ka_info.init_time = time_wall_msec();
 
+unixctl_command_register("keepalive/status", "", 0, 0,
+  ka_unixctl_status, NULL);
+
+unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
+  ka_unixctl_pmd_health_show, _info);
+
 ovsthread_once_done(_enable);
 }
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 09/10] Documentation: Update DPDK doc with Keepalive feature.

2017-09-15 Thread Bhanuprakash Bodireddy
Keepalive feature is aimed at achieving Fastpath Service Assurance
in OVS-DPDK deployments. It adds support for monitoring the packet
processing threads by dispatching heartbeats at regular intervals.

The implementation uses OvSDB for reporting the health of the PMD threads.
Any external monitoring application can query the OvSDB for status
at regular intervals (or) subscribe to OvSDB updates.

keepalive feature can be enabled through below OVSDB settings.

enable-keepalive=true
  - Keepalive feature is disabled by default and should be enabled
at startup before ovs-vswitchd daemon is started.

keepalive-interval="5000"
  - Timer interval in milliseconds for monitoring the packet
processing cores.

When KA is enabled, 'ovs-keepalive' thread shall be spawned that wakes
up at regular intervals to update the timestamp and status of pmd
threads in process map. This information shall be read by vswitchd thread
and written in to 'keepalive' column of Open_vSwitch table in OVSDB.

An external monitoring framework like collectd with ovs events support
can read (or) subscribe to the datapath status changes in ovsdb. When the state
is updated, the collectd shall be notified and will eventually relay the status
to ceilometer service running in the controller. Below is the high level
overview of deployment model.

Compute NodeControllerCompute Node

Collectd  <--> Ceilometer <>   Collectd

OvS DPDK   OvS DPDK

+-+
| VM  |
+--+--+
   \---+---/
   |
+--+---+   ++--+ +--+---+
| OVS  |-> |   ovsevents plugin| --> |   collectd   |
+--+---+   ++--+ +--+---+

+--+-+ +---++ |
| Ceilometer | <-- | collectd ceilometer plugin |  <---
+--+-+ +---++

Performance impact
--
No noticeable performance or latency impact is observed with
KA feature enabled.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 Documentation/howto/dpdk.rst | 113 +++
 1 file changed, 113 insertions(+)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index d123819..6fc1316 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -439,6 +439,119 @@ For certain traffic profiles with many parallel flows, 
it's recommended to set
 
 For more information on the EMC refer to :doc:`/intro/install/dpdk` .
 
+.. _dpdk_keepalive:
+
+Keepalive
+-
+
+OvS Keepalive(KA) feature is disabled by default. To enable KA feature::
+
+$ ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true
+
+The KA feature can't be enabled at run time and should be done at startup
+before ovs-vswitchd daemon is started.
+
+The default timer interval for monitoring packet processing threads is 1000ms.
+To set a different timer value, run::
+
+$ ovs-vsctl --no-wait set Open_vSwitch . \
+other_config:keepalive-interval="5000"
+
+The events comprise of thread states and the last seen timestamps. The events
+are written in to process map periodically by keepalive thread.
+
+The events in the process map are retrieved by main(vswitchd) thread and
+updated in to keepalive column of Open_vSwitch table in OVSDB. Any external
+monitoring application can read the status from OVSDB at intervals or subscribe
+to the updates so that they get notified when the changes happen on OvSDB.
+
+To monitor the datapath status using ovsdb-client, run::
+
+$ ovsdb-client monitor Open_vSwitch
+$ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive
+
+The datapath thread states are explained below::
+
+  KA_STATE_UNUSED  - Not registered to KA framework.
+  KA_STATE_ALIVE   - Thread alive.
+  KA_STATE_MISSING - Thread missed first heartbeat.
+  KA_STATE_DEAD- Thread missed two heartbeats.
+  KA_STATE_GONE- Thread missed two or more heartbeats and burried.
+  KA_STATE_DOZING  - Thread is idle.
+  KA_STATE_SLEEP   - Thread is sleeping.
+
+To query the datapath status, run::
+
+$ ovs-appctl keepalive/pmd-health-show
+
+`collectd <https://collectd.org/>`__ has built-in support for DPDK and provides
+a `ovs_events` and `ovs_stats` plugin that can be enabled to relay the datapath
+status and the PMD status to OpenStack service `Ceilometer
+<https://docs.openstack.org/developer/ceilometer/>`__.
+
+To install and configure `collectd`, run::
+
+# Clone collectd from Git repository
+$ git clone https://github.com/collectd/collectd.git
+
+# configure and install collectd
+$ cd collectd
+$ ./build.sh
+$ ./configure --enable-syslog --enable-logfile --

[ovs-dev] [PATCH 07/10] odp-execute: Use const qualifer for batch size.

2017-09-19 Thread Bhanuprakash Bodireddy
It is recommended to use const qualifer for 'num' that tracks the
packet batch count. This way 'num' can't be modified by iterator.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/odp-execute.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 5f4d23a..3109f39 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -826,7 +826,8 @@ odp_execute_actions(void *dp, struct dp_packet_batch 
*batch, bool steal,
 break;
 }
 case OVS_ACTION_ATTR_DECAP_NSH: {
-size_t i, num = batch->count;
+size_t i;
+const size_t num = dp_packet_batch_size(batch);
 
 DP_PACKET_BATCH_REFILL_FOR_EACH (i, num, packet, batch) {
 if (decap_nsh(packet)) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 06/10] netdev-bsd: Use DP_PACKET_BATCH_FOR_EACH in netdev_bsd_send.

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_bsd_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-bsd.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 8a4cdb3..96ba71c 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -685,6 +685,7 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
 {
 struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
 const char *name = netdev_get_name(netdev_);
+struct dp_packet *packet;
 int error;
 int i;
 
@@ -695,9 +696,9 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
 error = 0;
 }
 
-for (i = 0; i < batch->count; i++) {
-const void *data = dp_packet_data(batch->packets[i]);
-size_t size = dp_packet_get_send_len(batch->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+const void *data = dp_packet_data(packet);
+size_t size = dp_packet_get_send_len(packet);
 
 while (!error) {
 ssize_t retval;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 10/10] dpif-netdev: Remove 'cnt' in dp_netdev_input__().

2017-09-19 Thread Bhanuprakash Bodireddy
There is little use of 'cnt' variable in dp_netdev_input__(). Get rid of
it and use dp_packet_batch_size() to initialize PKT_ARRAY_SIZE.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 07fca44..35b7a64 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5100,9 +5100,8 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
   struct dp_packet_batch *packets,
   bool md_is_valid, odp_port_t port_no)
 {
-int cnt = packets->count;
 #if !defined(__CHECKER__) && !defined(_WIN32)
-const size_t PKT_ARRAY_SIZE = cnt;
+const size_t PKT_ARRAY_SIZE = dp_packet_batch_size(packets);
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 02/10] netdev-linux: Use DP_PACKET_BATCH_FOR_EACH in netdev_linux_tap_batch_send.

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_linux_tap_batch_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-linux.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index a1d9e2f..440598b 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1233,8 +1233,8 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
 struct dp_packet_batch *batch)
 {
 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-for (int i = 0; i < batch->count; i++) {
-struct dp_packet *packet = batch->packets[i];
+struct dp_packet *packet;
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
 size_t size = dp_packet_get_send_len(packet);
 ssize_t retval;
 int error;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 01/10] netdev-linux: Clean up netdev_linux_sock_batch_send().

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro and dp_packet_batch_size() API
in netdev_linux_sock_batch_send(). No change in functionality.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-linux.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 2ff3e2b..a1d9e2f 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1186,16 +1186,17 @@ static int
 netdev_linux_sock_batch_send(int sock, int ifindex,
  struct dp_packet_batch *batch)
 {
+const size_t size = dp_packet_batch_size(batch);
 /* We don't bother setting most fields in sockaddr_ll because the
  * kernel ignores them for SOCK_RAW. */
 struct sockaddr_ll sll = { .sll_family = AF_PACKET,
.sll_ifindex = ifindex };
 
-struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * batch->count);
-struct iovec *iov = xmalloc(sizeof(*iov) * batch->count);
+struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * size);
+struct iovec *iov = xmalloc(sizeof(*iov) * size);
 
-for (int i = 0; i < batch->count; i++) {
-struct dp_packet *packet = batch->packets[i];
+struct dp_packet *packet;
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
 iov[i].iov_base = dp_packet_data(packet);
 iov[i].iov_len = dp_packet_get_send_len(packet);
 mmsg[i].msg_hdr = (struct msghdr) { .msg_name = ,
@@ -1205,10 +1206,10 @@ netdev_linux_sock_batch_send(int sock, int ifindex,
 }
 
 int error = 0;
-for (uint32_t ofs = 0; ofs < batch->count; ) {
+for (uint32_t ofs = 0; ofs < size; ) {
 ssize_t retval;
 do {
-retval = sendmmsg(sock, mmsg + ofs, batch->count - ofs, 0);
+retval = sendmmsg(sock, mmsg + ofs, size - ofs, 0);
 error = retval < 0 ? errno : 0;
 } while (error == EINTR);
 if (error) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 00/10] Use DP_PACKET_BATCH_FOR_EACH macro.

2017-09-19 Thread Bhanuprakash Bodireddy
DP_PACKET_BATCH_FOR_EACH macro was introduced early this year as part
of enhancing packet batch APIs. Commit '72c84bc2' implemented this macro
and replaced most of the calling sites with macros and simplified the logic.

However there are still many APIs that needs to be fixed.
This patch series is a simple and straightforward set of changes
aimed at using DP_PACKET_BATCH_FOR_EACH macro at all appropriate places.
Also minor code cleanup is done to improve readability of the code.

No functionality changes and no performance impact with this series.

Bhanuprakash Bodireddy (10):
  netdev-linux: Clean up netdev_linux_sock_batch_send().
  netdev-linux: Use DP_PACKET_BATCH_FOR_EACH in
netdev_linux_tap_batch_send.
  netdev-dpdk: Cleanup dpdk_do_tx_copy.
  netdev-dpdk: Minor cleanup of netdev_dpdk_send__.
  netdev-dpdk: Use DP_PACKET_BATCH_FOR_EACH in netdev_dpdk_ring_send
  netdev-bsd: Use DP_PACKET_BATCH_FOR_EACH in netdev_bsd_send.
  odp-execute: Use const qualifer for batch size.
  dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in dp_netdev_run_meter.
  dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in fast_path_processing.
  dpif-netdev: Remove 'cnt' in dp_netdev_input__().

 lib/dpif-netdev.c  | 33 +++--
 lib/netdev-bsd.c   |  7 ---
 lib/netdev-dpdk.c  | 40 +++-
 lib/netdev-linux.c | 17 +
 lib/odp-execute.c  |  3 ++-
 5 files changed, 49 insertions(+), 51 deletions(-)

-- 
2.4.11
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 05/10] netdev-dpdk: Use DP_PACKET_BATCH_FOR_EACH in netdev_dpdk_ring_send

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_dpdk_ring_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4e75bf1..3ff79c1 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2940,14 +2940,14 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid,
   bool concurrent_txq)
 {
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-unsigned i;
+struct dp_packet *packet;
 
 /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that
  * the rss hash field is clear. This is because the same mbuf may be
  * modified by the consumer of the ring and return into the datapath
  * without recalculating the RSS hash. */
-for (i = 0; i < batch->count; i++) {
-dp_packet_mbuf_rss_flag_reset(batch->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+dp_packet_mbuf_rss_flag_reset(packet);
 }
 
 netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 09/10] dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in fast_path_processing.

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in fast_path_processing().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5000f7a..07fca44 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5001,14 +5001,14 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
  odp_port_t in_port,
  long long now)
 {
-int cnt = packets_->count;
+const size_t cnt = dp_packet_batch_size(packets_);
 #if !defined(__CHECKER__) && !defined(_WIN32)
 const size_t PKT_ARRAY_SIZE = cnt;
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
 #endif
-struct dp_packet **packets = packets_->packets;
+struct dp_packet *packet;
 struct dpcls *cls;
 struct dpcls_rule *rules[PKT_ARRAY_SIZE];
 struct dp_netdev *dp = pmd->dp;
@@ -5036,7 +5036,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 ofpbuf_use_stub(, actions_stub, sizeof actions_stub);
 ofpbuf_use_stub(_actions, slow_stub, sizeof slow_stub);
 
-for (i = 0; i < cnt; i++) {
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
 struct dp_netdev_flow *netdev_flow;
 
 if (OVS_LIKELY(rules[i])) {
@@ -5055,7 +5055,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 }
 
 miss_cnt++;
-handle_packet_upcall(pmd, packets[i], [i], ,
+handle_packet_upcall(pmd, packet, [i], ,
  _actions, _cnt, now);
 }
 
@@ -5063,17 +5063,16 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 ofpbuf_uninit(_actions);
 fat_rwlock_unlock(>upcall_rwlock);
 } else if (OVS_UNLIKELY(any_miss)) {
-for (i = 0; i < cnt; i++) {
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
 if (OVS_UNLIKELY(!rules[i])) {
-dp_packet_delete(packets[i]);
+dp_packet_delete(packet);
 lost_cnt++;
 miss_cnt++;
 }
 }
 }
 
-for (i = 0; i < cnt; i++) {
-struct dp_packet *packet = packets[i];
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
 struct dp_netdev_flow *flow;
 
 if (OVS_UNLIKELY(!rules[i])) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 08/10] dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in dp_netdev_run_meter.

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in dp_netdev_run_meter().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ca74df8..5000f7a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4121,10 +4121,11 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 {
 struct dp_meter *meter;
 struct dp_meter_band *band;
+struct dp_packet *packet;
 long long int long_delta_t; /* msec */
 uint32_t delta_t; /* msec */
 int i;
-int cnt = packets_->count;
+const size_t cnt = dp_packet_batch_size(packets_);
 uint32_t bytes, volume;
 int exceeded_band[NETDEV_MAX_BURST];
 uint32_t exceeded_rate[NETDEV_MAX_BURST];
@@ -4157,8 +4158,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 meter->used = now;
 meter->packet_count += cnt;
 bytes = 0;
-for (i = 0; i < cnt; i++) {
-bytes += dp_packet_size(packets_->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+bytes += dp_packet_size(packet);
 }
 meter->byte_count += bytes;
 
@@ -4208,8 +4209,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 } else {
 /* Packet sizes differ, must process one-by-one. */
 band_exceeded_pkt = cnt;
-for (i = 0; i < cnt; i++) {
-uint32_t bits = dp_packet_size(packets_->packets[i]) * 8;
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+uint32_t bits = dp_packet_size(packet) * 8;
 
 if (band->bucket >= bits) {
 band->bucket -= bits;
@@ -4237,10 +4238,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 /* Fire the highest rate band exceeded by each packet.
  * Drop packets if needed, by swapping packet to the end that will be
  * ignored. */
-const size_t size = dp_packet_batch_size(packets_);
-struct dp_packet *packet;
 size_t j;
-DP_PACKET_BATCH_REFILL_FOR_EACH (j, size, packet, packets_) {
+DP_PACKET_BATCH_REFILL_FOR_EACH (j, cnt, packet, packets_) {
 if (exceeded_band[j] >= 0) {
 /* Meter drop packet. */
 band = >bands[exceeded_band[j]];
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 04/10] netdev-dpdk: Minor cleanup of netdev_dpdk_send__.

2017-09-19 Thread Bhanuprakash Bodireddy
The variable 'cnt' is initialized and reused in multiple function calls
inside netdev_dpdk_send__() and is confusing sometimes. Instead introduce
'batch_cnt' to hold the original packet count and 'tx_cnt' to store
the final packet count resulting after filtering and qos operations.

Finally 'tx_cnt' packets gets transmitted on the respective 'qid'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 2078c94..4e75bf1 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1938,17 +1938,17 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 dpdk_do_tx_copy(netdev, qid, batch);
 dp_packet_delete_batch(batch, may_steal);
 } else {
-int dropped;
-int cnt = batch->count;
+int tx_cnt, dropped;
+int batch_cnt = dp_packet_batch_size(batch);
 struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
 
 dp_packet_batch_apply_cutlen(batch);
 
-cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
-cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
-dropped = batch->count - cnt;
+tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
+tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
+dropped = batch_cnt - tx_cnt;
 
-dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
+dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
 
 if (OVS_UNLIKELY(dropped)) {
 rte_spinlock_lock(>stats_lock);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 03/10] netdev-dpdk: Cleanup dpdk_do_tx_copy.

2017-09-19 Thread Bhanuprakash Bodireddy
Clean up dpdk_do_tx_copy()

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 648d719..2078c94 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1835,22 +1835,23 @@ static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 OVS_NO_THREAD_SAFETY_ANALYSIS
 {
+const size_t batch_cnt = dp_packet_batch_size(batch);
 #if !defined(__CHECKER__) && !defined(_WIN32)
-const size_t PKT_ARRAY_SIZE = batch->count;
+const size_t PKT_ARRAY_SIZE = batch_cnt;
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
 #endif
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
-uint32_t cnt = batch->count;
+uint32_t cnt = batch_cnt;
 uint32_t dropped = 0;
 
 if (dev->type != DPDK_DEV_VHOST) {
 /* Check if QoS has been configured for this netdev. */
 cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
-  cnt, false);
-dropped += batch->count - cnt;
+  batch_cnt, false);
+dropped += batch_cnt - cnt;
 }
 
 dp_packet_batch_apply_cutlen(batch);
@@ -1858,8 +1859,8 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 uint32_t txcnt = 0;
 
 for (uint32_t i = 0; i < cnt; i++) {
-
-uint32_t size = dp_packet_size(batch->packets[i]);
+struct dp_packet *packet = batch->packets[i];
+uint32_t size = dp_packet_size(packet);
 
 if (OVS_UNLIKELY(size > dev->max_packet_len)) {
 VLOG_WARN_RL(, "Too big size %u max_packet_len %d",
@@ -1870,18 +1871,15 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 }
 
 pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
-
-if (!pkts[txcnt]) {
+if (OVS_UNLIKELY(!pkts[txcnt])) {
 dropped += cnt - i;
 break;
 }
 
 /* We have to do a copy for now */
 memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
-   dp_packet_data(batch->packets[i]), size);
-
-rte_pktmbuf_data_len(pkts[txcnt]) = size;
-rte_pktmbuf_pkt_len(pkts[txcnt]) = size;
+   dp_packet_data(packet), size);
+dp_packet_set_size((struct dp_packet *)pkts[txcnt], size);
 
 txcnt++;
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 12/12] dpif-netdev: Fix comments for pmd_load_cached_ports.

2017-09-20 Thread Bhanuprakash Bodireddy
Commit 57eebbb4c315 replaces thread local 'pmd->port_cache' with
'pmd->tnl_port_cache' and 'pmd->send_port_cache' maps. Update the
comments accordingly.

Fixes: 57eebbb4c315 ("Don't try to output on a device without txqs")
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 35b7a64..1b5420e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3933,7 +3933,9 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
 }
 
 /* Copies ports from 'pmd->tx_ports' (shared with the main thread) to
- * 'pmd->port_cache' (thread local) */
+ * thread-local copies. Copy to 'pmd->tnl_port_cache' if it is a tunnel
+ * device, otherwise to 'pmd->send_port_cache' if the port has atleast
+ * one txq. */
 static void
 pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
 OVS_REQUIRES(pmd->port_mutex)
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 09/12] netdev-dpdk: Minor cleanup of netdev_dpdk_send__.

2017-09-20 Thread Bhanuprakash Bodireddy
The variable 'cnt' is initialized and reused in multiple function calls
inside netdev_dpdk_send__() and is confusing sometimes. Instead introduce
'batch_cnt' to hold the original packet count and 'tx_cnt' to store
the final packet count resulting after filtering and qos operations.

Finally 'tx_cnt' packets gets transmitted on the respective 'qid'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 60887e5..3ff79c1 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1938,17 +1938,17 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 dpdk_do_tx_copy(netdev, qid, batch);
 dp_packet_delete_batch(batch, may_steal);
 } else {
-int dropped;
-int cnt = batch->count;
+int tx_cnt, dropped;
+int batch_cnt = dp_packet_batch_size(batch);
 struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
 
 dp_packet_batch_apply_cutlen(batch);
 
-cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
-cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
-dropped = batch->count - cnt;
+tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
+tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
+dropped = batch_cnt - tx_cnt;
 
-dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
+dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
 
 if (OVS_UNLIKELY(dropped)) {
 rte_spinlock_lock(>stats_lock);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 11/12] dpif-netdev: Remove 'cnt' in dp_netdev_input__().

2017-09-20 Thread Bhanuprakash Bodireddy
There is little use of 'cnt' variable in dp_netdev_input__(). Get rid of
it and use dp_packet_batch_size() to initialize PKT_ARRAY_SIZE.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 07fca44..35b7a64 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5100,9 +5100,8 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
   struct dp_packet_batch *packets,
   bool md_is_valid, odp_port_t port_no)
 {
-int cnt = packets->count;
 #if !defined(__CHECKER__) && !defined(_WIN32)
-const size_t PKT_ARRAY_SIZE = cnt;
+const size_t PKT_ARRAY_SIZE = dp_packet_batch_size(packets);
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 10/12] odp-execute: Use const qualifer for batch size.

2017-09-20 Thread Bhanuprakash Bodireddy
It is recommended to use const qualifer for 'num' that tracks the
packet batch count. This way 'num' can't be modified by iterator.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/odp-execute.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 5f4d23a..3109f39 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -826,7 +826,8 @@ odp_execute_actions(void *dp, struct dp_packet_batch 
*batch, bool steal,
 break;
 }
 case OVS_ACTION_ATTR_DECAP_NSH: {
-size_t i, num = batch->count;
+size_t i;
+const size_t num = dp_packet_batch_size(batch);
 
 DP_PACKET_BATCH_REFILL_FOR_EACH (i, num, packet, batch) {
 if (decap_nsh(packet)) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 00/12] Use packet batch macro and APIs.

2017-09-20 Thread Bhanuprakash Bodireddy
DP_PACKET_BATCH_FOR_EACH macro was introduced early this year as part
of enhancing packet batch APIs. Commit '72c84bc2' implemented this macro
and replaced most of the calling sites with macros and simplified the logic.
However there are still many APIs that needs to be fixed.

This patch series is a simple and straightforward set of changes
aimed at using DP_PACKET_BATCH_FOR_EACH macro at all appropriate places.

The second part of the patch series deals with code cleanup and renaming
few variables to improve the readability of the code. The last patch in
this series fixes a wrong comment.

No functionality changes and performance impact with this series.

v1->v2
 * Add patches from below to this series.
   https://mail.openvswitch.org/pipermail/ovs-dev/2017-September/338673.html

 * Group the patches by theme in the below order.
6 patches - Use DP_PACKET_BATCH_FOR_EACH
5 patches - Code Cleanup
1 patch   - Fix comment.

Bhanuprakash Bodireddy (12):
  conntrack: Use DP_PACKET_BATCH_FOR_EACH macro.
  netdev-linux: Use DP_PACKET_BATCH_FOR_EACH in
netdev_linux_tap_batch_send.
  netdev-dpdk: Use DP_PACKET_BATCH_FOR_EACH in netdev_dpdk_ring_send
  netdev-bsd: Use DP_PACKET_BATCH_FOR_EACH in netdev_bsd_send.
  dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in dp_netdev_run_meter.
  dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in fast_path_processing.
  netdev-linux: Clean up netdev_linux_sock_batch_send().
  netdev-dpdk: Cleanup dpdk_do_tx_copy.
  netdev-dpdk: Minor cleanup of netdev_dpdk_send__.
  odp-execute: Use const qualifer for batch size.
  dpif-netdev: Remove 'cnt' in dp_netdev_input__().
  dpif-netdev: Fix comments for pmd_load_cached_ports.

 lib/conntrack.c| 13 ++---
 lib/dpif-netdev.c  | 37 ++---
 lib/netdev-bsd.c   |  7 ---
 lib/netdev-dpdk.c  | 40 +++-
 lib/netdev-linux.c | 17 +
 lib/odp-execute.c  |  3 ++-
 6 files changed, 58 insertions(+), 59 deletions(-)

-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 02/12] netdev-linux: Use DP_PACKET_BATCH_FOR_EACH in netdev_linux_tap_batch_send.

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_linux_tap_batch_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-linux.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 2ff3e2b..6769819 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1232,8 +1232,8 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
 struct dp_packet_batch *batch)
 {
 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-for (int i = 0; i < batch->count; i++) {
-struct dp_packet *packet = batch->packets[i];
+struct dp_packet *packet;
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
 size_t size = dp_packet_get_send_len(packet);
 ssize_t retval;
 int error;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 03/12] netdev-dpdk: Use DP_PACKET_BATCH_FOR_EACH in netdev_dpdk_ring_send

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_dpdk_ring_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 648d719..60ec528 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2942,14 +2942,14 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid,
   bool concurrent_txq)
 {
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-unsigned i;
+struct dp_packet *packet;
 
 /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that
  * the rss hash field is clear. This is because the same mbuf may be
  * modified by the consumer of the ring and return into the datapath
  * without recalculating the RSS hash. */
-for (i = 0; i < batch->count; i++) {
-dp_packet_mbuf_rss_flag_reset(batch->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+dp_packet_mbuf_rss_flag_reset(packet);
 }
 
 netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 04/12] netdev-bsd: Use DP_PACKET_BATCH_FOR_EACH in netdev_bsd_send.

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_bsd_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-bsd.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 8a4cdb3..96ba71c 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -685,6 +685,7 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
 {
 struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
 const char *name = netdev_get_name(netdev_);
+struct dp_packet *packet;
 int error;
 int i;
 
@@ -695,9 +696,9 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
 error = 0;
 }
 
-for (i = 0; i < batch->count; i++) {
-const void *data = dp_packet_data(batch->packets[i]);
-size_t size = dp_packet_get_send_len(batch->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+const void *data = dp_packet_data(packet);
+size_t size = dp_packet_get_send_len(packet);
 
 while (!error) {
 ssize_t retval;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 01/12] conntrack: Use DP_PACKET_BATCH_FOR_EACH macro.

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in conntrack_execute().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/conntrack.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 419cb1d..019092e 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1141,17 +1141,16 @@ conntrack_execute(struct conntrack *ct, struct 
dp_packet_batch *pkt_batch,
   long long now)
 {
 
-struct dp_packet **pkts = pkt_batch->packets;
-size_t cnt = pkt_batch->count;
+struct dp_packet *packet;
 struct conn_lookup_ctx ctx;
 
-for (size_t i = 0; i < cnt; i++) {
-if (!conn_key_extract(ct, pkts[i], dl_type, , zone)) {
-pkts[i]->md.ct_state = CS_INVALID;
-write_ct_md(pkts[i], zone, NULL, NULL, NULL);
+DP_PACKET_BATCH_FOR_EACH (packet, pkt_batch) {
+if (!conn_key_extract(ct, packet, dl_type, , zone)) {
+packet->md.ct_state = CS_INVALID;
+write_ct_md(packet, zone, NULL, NULL, NULL);
 continue;
 }
-process_one(ct, pkts[i], , zone, force, commit,
+process_one(ct, packet, , zone, force, commit,
 now, setmark, setlabel, nat_action_info, helper);
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 05/12] dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in dp_netdev_run_meter.

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in dp_netdev_run_meter().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ca74df8..5000f7a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4121,10 +4121,11 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 {
 struct dp_meter *meter;
 struct dp_meter_band *band;
+struct dp_packet *packet;
 long long int long_delta_t; /* msec */
 uint32_t delta_t; /* msec */
 int i;
-int cnt = packets_->count;
+const size_t cnt = dp_packet_batch_size(packets_);
 uint32_t bytes, volume;
 int exceeded_band[NETDEV_MAX_BURST];
 uint32_t exceeded_rate[NETDEV_MAX_BURST];
@@ -4157,8 +4158,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 meter->used = now;
 meter->packet_count += cnt;
 bytes = 0;
-for (i = 0; i < cnt; i++) {
-bytes += dp_packet_size(packets_->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+bytes += dp_packet_size(packet);
 }
 meter->byte_count += bytes;
 
@@ -4208,8 +4209,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 } else {
 /* Packet sizes differ, must process one-by-one. */
 band_exceeded_pkt = cnt;
-for (i = 0; i < cnt; i++) {
-uint32_t bits = dp_packet_size(packets_->packets[i]) * 8;
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+uint32_t bits = dp_packet_size(packet) * 8;
 
 if (band->bucket >= bits) {
 band->bucket -= bits;
@@ -4237,10 +4238,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 /* Fire the highest rate band exceeded by each packet.
  * Drop packets if needed, by swapping packet to the end that will be
  * ignored. */
-const size_t size = dp_packet_batch_size(packets_);
-struct dp_packet *packet;
 size_t j;
-DP_PACKET_BATCH_REFILL_FOR_EACH (j, size, packet, packets_) {
+DP_PACKET_BATCH_REFILL_FOR_EACH (j, cnt, packet, packets_) {
 if (exceeded_band[j] >= 0) {
 /* Meter drop packet. */
 band = >bands[exceeded_band[j]];
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 08/12] netdev-dpdk: Cleanup dpdk_do_tx_copy.

2017-09-20 Thread Bhanuprakash Bodireddy
Cleanup dpdk_do_tx_copy().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 60ec528..60887e5 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1835,22 +1835,23 @@ static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 OVS_NO_THREAD_SAFETY_ANALYSIS
 {
+const size_t batch_cnt = dp_packet_batch_size(batch);
 #if !defined(__CHECKER__) && !defined(_WIN32)
-const size_t PKT_ARRAY_SIZE = batch->count;
+const size_t PKT_ARRAY_SIZE = batch_cnt;
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
 #endif
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
-uint32_t cnt = batch->count;
+uint32_t cnt = batch_cnt;
 uint32_t dropped = 0;
 
 if (dev->type != DPDK_DEV_VHOST) {
 /* Check if QoS has been configured for this netdev. */
 cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
-  cnt, false);
-dropped += batch->count - cnt;
+  batch_cnt, false);
+dropped += batch_cnt - cnt;
 }
 
 dp_packet_batch_apply_cutlen(batch);
@@ -1858,8 +1859,8 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 uint32_t txcnt = 0;
 
 for (uint32_t i = 0; i < cnt; i++) {
-
-uint32_t size = dp_packet_size(batch->packets[i]);
+struct dp_packet *packet = batch->packets[i];
+uint32_t size = dp_packet_size(packet);
 
 if (OVS_UNLIKELY(size > dev->max_packet_len)) {
 VLOG_WARN_RL(, "Too big size %u max_packet_len %d",
@@ -1870,18 +1871,15 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 }
 
 pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
-
-if (!pkts[txcnt]) {
+if (OVS_UNLIKELY(!pkts[txcnt])) {
 dropped += cnt - i;
 break;
 }
 
 /* We have to do a copy for now */
 memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
-   dp_packet_data(batch->packets[i]), size);
-
-rte_pktmbuf_data_len(pkts[txcnt]) = size;
-rte_pktmbuf_pkt_len(pkts[txcnt]) = size;
+   dp_packet_data(packet), size);
+dp_packet_set_size((struct dp_packet *)pkts[txcnt], size);
 
 txcnt++;
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 07/13] netdev-provider: Reorder elements in netdev structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in netdev structure, holes can be removed.

Before: structure size: 88, sum holes: 10, cachelines:2
After : structure size: 80, sum holes:  2, cachelines:2

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-provider.h | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 5d7bd4f..1720deb 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -49,6 +49,12 @@ struct netdev {
  * opening this device, and therefore got assigned to the "system" class */
 bool auto_classified;
 
+/* If this is 'true', the user explicitly specified an MTU for this
+ * netdev.  Otherwise, Open vSwitch is allowed to override it. */
+bool mtu_user_config;
+
+int ref_cnt;/* Times this devices was opened. */
+
 /* A sequence number which indicates changes in one of 'netdev''s
  * properties.   It must be nonzero so that users have a value which
  * they may use as a reset when tracking 'netdev'.
@@ -67,16 +73,11 @@ struct netdev {
 struct seq *reconfigure_seq;
 uint64_t last_reconfigure_seq;
 
-/* If this is 'true', the user explicitly specified an MTU for this
- * netdev.  Otherwise, Open vSwitch is allowed to override it. */
-bool mtu_user_config;
-
 /* The core netdev code initializes these at netdev construction and only
  * provide read-only access to its client.  Netdev implementations may
  * modify them. */
 int n_txq;
 int n_rxq;
-int ref_cnt;/* Times this devices was opened. */
 struct shash_node *node;/* Pointer to element in global map. */
 struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". 
*/
 };
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 05/13] netdev: Reorder elements in netdev_tunnel_config structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in netdev_tunnel_config structure, sum holes and
pad bytes can be reduced.

Before: structure size: 96, sum holes: 17, pad bytes: 4, cachelines:2
After : structure size: 80, sum holes:  5, pad bytes: 0, cachelines:2

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/netdev.h b/lib/netdev.h
index f8482f7..3a545fe 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -99,9 +99,9 @@ enum netdev_pt_mode {
 
 /* Configuration specific to tunnels. */
 struct netdev_tunnel_config {
+ovs_be64 in_key;
 bool in_key_present;
 bool in_key_flow;
-ovs_be64 in_key;
 
 bool out_key_present;
 bool out_key_flow;
@@ -115,8 +115,8 @@ struct netdev_tunnel_config {
 struct in6_addr ipv6_dst;
 
 uint32_t exts;
-bool set_egress_pkt_mark;
 uint32_t egress_pkt_mark;
+bool set_egress_pkt_mark;
 
 uint8_t ttl;
 bool ttl_inherit;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


<    1   2   3   >