[ovs-dev] [RFC PATCH 2/2] dpif-netdev: Avoid recirculation during vxlan decap.

2018-01-12 Thread Bhanuprakash Bodireddy
This commit avoids datapath recirculation during packet decapsulation
by combining actions at upcall handling.

This patch uses the PTYPEs feature provided by NICs to detect the
tunnel packets and combines the actions at upcall processing there by
skipping recirculation.

If PTYPEs aren't supported by a NIC on DUT or if actions can't be
combined due to complex flow rules, this patch falls back to previous
recirculation approach.

Test results: Decap alone test 118 byte packets.

  CFLAGS="-O3 -march=native"
OvS Master(491e05c2)   5.336 Mpps
OvS Master + PATCH 6.224 Mpps

  CFLAGS="-g -O2"
OvS Master(491e05c2)   4.903 Mpps
OvS Master + PATCH 5.824 Mpps

The performance gain is approximately 15%.

Suggested-by: Sugesh Chandran <sugesh.chand...@intel.com>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 193 +++---
 lib/dpif.c|   3 +-
 lib/dpif.h|   2 +-
 lib/netdev-native-tnl.c   |   8 ++
 lib/netdev-native-tnl.h   |   3 +
 lib/odp-execute.c |   2 +-
 lib/odp-execute.h |   3 +-
 ofproto/ofproto-dpif-upcall.c |  16 ++--
 8 files changed, 190 insertions(+), 40 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a34a1c5..c786578 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -54,6 +54,7 @@
 #include "netdev.h"
 #include "netdev-vport.h"
 #include "netlink.h"
+#include "netdev-native-tnl.h"
 #include "odp-execute.h"
 #include "odp-util.h"
 #include "openvswitch/dynamic-string.h"
@@ -4849,7 +4850,8 @@ static int
 dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct dp_packet *packet_,
  struct flow *flow, struct flow_wildcards *wc, ovs_u128 *ufid,
  enum dpif_upcall_type type, const struct nlattr *userdata,
- struct ofpbuf *actions, struct ofpbuf *put_actions)
+ struct ofpbuf *actions, struct ofpbuf *put_actions,
+ bool install_flow)
 {
 struct dp_netdev *dp = pmd->dp;
 
@@ -4883,7 +4885,8 @@ dp_netdev_upcall(struct dp_netdev_pmd_thread *pmd, struct 
dp_packet *packet_,
 }
 
 return dp->upcall_cb(packet_, flow, ufid, pmd->core_id, type, userdata,
- actions, wc, put_actions, dp->upcall_aux);
+ actions, wc, put_actions, dp->upcall_aux,
+ install_flow);
 }
 
 static inline uint32_t
@@ -5096,6 +5099,109 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
 return dp_packet_batch_size(packets_);
 }
 
+static bool
+has_tunnel_pop_action(struct ofpbuf *actions, odp_port_t *portno)
+{
+int pop_act_cnt = 0;
+int act_num = 0;
+int act_index = 0;
+
+const struct nlattr *a;
+size_t left;
+NL_ATTR_FOR_EACH_UNSAFE (a, left, actions->data, actions->size) {
+act_num++;
+if (nl_attr_type(a) == OVS_ACTION_ATTR_TUNNEL_POP) {
+*portno = nl_attr_get_odp_port(a);
+act_index = act_num;
+pop_act_cnt++;
+}
+}
+
+/* Return true if actions has pop action and is the last action
+ * in the list. */
+if ((pop_act_cnt == 1) && (act_index == act_num)) {
+return true;
+}
+
+return false;
+}
+
+static inline int
+handle_packet_tnl_upcall(struct dp_netdev_pmd_thread *pmd,
+ struct dp_packet *packet, struct match *match,
+ ovs_u128 *ufid, const struct netdev_flow_key *key,
+ struct ofpbuf *actions, struct ofpbuf *put_actions,
+ int *lost_cnt)
+{
+int error;
+
+/* Process tnl header:
+ *   (a) Take temporary copy of packet.
+ *   (b) Reset tunnel ip_dst field so that miniflow extraction
+ *   would skip parsing pre-extracted tunnel info in metadata.
+ *   Tunnel header is popped in packet_mf_extract(), the
+ *   inner header and data is extracted now.
+ *   (c) Invoke upcall processing and store the actions in
+ *   tmp_actions, which later get copied in to actions.
+ */
+struct dp_packet tmp_pkt;
+memcpy(_pkt, packet, sizeof(struct dp_packet));
+
+tmp_pkt.md.tunnel.ip_dst = 0;
+struct netdev_flow_key tkey;
+miniflow_extract(_pkt, );
+miniflow_expand(, >flow);
+
+uint64_t actions_stub[512 / 8];
+struct ofpbuf tmp_actions;
+ofpbuf_use_stub(_actions, actions_stub, sizeof actions_stub);
+error = dp_netdev_upcall(pmd, packet, >flow, >wc,
+ ufid, DPIF_UC_MISS, NULL, _actions,
+ put_actions, false);
+if (OVS_UNLIKELY(error && error != ENOSPC)) {
+dp_packet_delete(packet);
+(*lost_cnt)++;
+return error;
+} else {
+/* Append the actio

[ovs-dev] [RFC PATCH 1/2] dpif-netdev: Refactor the miniflow extraction logic using PTYPEs.

2018-01-12 Thread Bhanuprakash Bodireddy
This commit refactors the miniflow extraction logic based on PTYPEs.

If the NIC supports PTYPEs, the packet_type field is populated and
the PTYPE value can be used to check if the packet is a tunnel packet.
In case of tunnel packet, tunnel information is populated in to
packet metadata and miniflow extraction is performed. This is used
by future commit that skips the recirculation on vxlan decapsulation.

If PTYPEs isn't supported by a NIC, this patch doesn't do any thing.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dp-packet.h   | 62 ++-
 lib/dpif-netdev.c | 29 --
 lib/flow.c| 38 ++
 lib/flow.h|  1 +
 4 files changed, 127 insertions(+), 3 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index b4b721c..9ff42d7 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -80,6 +80,11 @@ struct dp_packet {
 };
 };
 
+struct ptype_offsets {
+uint16_t l3_ofs;
+uint16_t l4_ofs;
+};
+
 static inline void *dp_packet_data(const struct dp_packet *);
 static inline void dp_packet_set_data(struct dp_packet *, void *);
 static inline void *dp_packet_base(const struct dp_packet *);
@@ -567,10 +572,16 @@ dp_packet_set_data(struct dp_packet *b, void *data)
 }
 
 static inline void
-dp_packet_reset_packet(struct dp_packet *b, int off)
+__dp_packet_reset_packet(struct dp_packet *b, int off)
 {
 dp_packet_set_size(b, dp_packet_size(b) - off);
 dp_packet_set_data(b, ((unsigned char *) dp_packet_data(b) + off));
+}
+
+static inline void
+dp_packet_reset_packet(struct dp_packet *b, int off)
+{
+__dp_packet_reset_packet(b, off);
 dp_packet_reset_offsets(b);
 }
 
@@ -691,6 +702,55 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
 #define reset_dp_packet_checksum_ol_flags(arg)
 #endif
 
+static inline bool
+dp_packet_is_encapsulated(struct dp_packet *p OVS_UNUSED)
+{
+#ifdef DPDK_NETDEV
+return RTE_ETH_IS_TUNNEL_PKT(p->mbuf.packet_type);
+#else
+return false;
+#endif
+}
+
+/*
+ * To use ptypes
+ *  82599ES(Niantic)
+ *Disable vectorization. (Requires CONFIG_RTE_IXGBE_INC_VECTOR=n
+ *   in config/common_base.)
+ *  XL710/X710(FVL)
+ *Firmware version >= 5.04 for correct ptype indentification.
+ */
+static inline bool
+dp_packet_is_tunnel(struct dp_packet *p OVS_UNUSED,
+struct ptype_offsets *pt_ofs OVS_UNUSED)
+{
+#ifdef DPDK_NETDEV
+#define VXLAN_DST_PORT 4789
+#define ETH_HDR_LEN 14
+uint32_t ptype = p->mbuf.packet_type;
+
+/* XXX: Use dp_packet_is_encapsulated(dp_packet) when
+ * ptypes is fixed in future. */
+if ((ptype & RTE_PTYPE_L4_MASK) == RTE_PTYPE_L4_UDP) {
+uint32_t hdr_len = (ptype & RTE_PTYPE_L3_IPV4) ?
+   ETH_HDR_LEN + IP_HEADER_LEN :
+   ETH_HDR_LEN + IPV6_HEADER_LEN;
+
+if (pt_ofs) {
+pt_ofs->l3_ofs = ETH_HDR_LEN;
+pt_ofs->l4_ofs = hdr_len;
+}
+struct udp_header *udp_hdr = dp_packet_at(p, hdr_len, UDP_HEADER_LEN);
+if (udp_hdr && udp_hdr->udp_dst == htons(VXLAN_DST_PORT)) {
+return true;
+}
+}
+return false;
+#else
+return false;
+#endif
+}
+
 enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
 
 struct dp_packet_batch {
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ef8ebf2..a34a1c5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4987,6 +4987,30 @@ dp_netdev_queue_batches(struct dp_packet *pkt,
 packet_batch_per_flow_update(batch, pkt, mf);
 }
 
+static void
+packet_mf_extract(struct dp_packet *pkt, struct netdev_flow_key *key)
+{
+struct ptype_offsets pt_ofs;
+if (dp_packet_is_tunnel(pkt, _ofs)) {
+   /* pkt->l3_ofs & pkt->l4_ofs should be valid before invoking
+* netdev_vxlan_pop_header. pkt->l3_ofs, pkt->l4_ofs are set
+* as part of miniflow extraction. This means dp_packet_l3() and
+* dp_packet_l4() works only if mf extraction is invoked early during
+* packet processing.
+*
+* In this case pop header operation should be performed before
+* mf extraction, so set the dp_packet offsets from the pt_ofs.
+*/
+pkt->l3_ofs = pt_ofs.l3_ofs;
+pkt->l4_ofs = pt_ofs.l4_ofs;
+tunnel_flow_extract(pkt, >mf);
+} else {
+miniflow_extract(pkt, >mf);
+}
+
+key->len = 0; /* Not computed yet. */
+}
+
 /* Try to process all ('cnt') the 'packets' using only the exact match cache
  * 'pmd->flow_cache'. If a flow is not found for a packet 'packets[i]', the
  * miniflow is copied into 'keys' and the packet pointer is moved at the
@@ -5037,8 +5061,9 @@ emc_processing(struct dp_netdev_pmd_thread *pmd,
 if (!md_is_valid) {
 pkt_metadata_init(>md, port_

[ovs-dev] [PATCH] netdev-native-tnl: Add assertion in vxlan_pop_header.

2018-01-12 Thread Bhanuprakash Bodireddy
During tunnel decapsulation the below steps are performed:
 [1] Tunnel information is populated in packet metadata i.e packet->md->tunnel.
 [2] Outer header gets popped.
 [3] Packet is recirculated.

For [1] to work, the dp_packet L3 and L4 header offsets should be valid.
The offsets in the dp_packet are set as part of miniflow extraction.

If offsets are accidentally reset (or) the pop header operation is performed
prior to miniflow extraction, step [1] fails silently and creates
issues that are harder to debug. Add the assertion to check if the
offsets are valid.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-native-tnl.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/lib/netdev-native-tnl.c b/lib/netdev-native-tnl.c
index 9ce8567..fb5eab0 100644
--- a/lib/netdev-native-tnl.c
+++ b/lib/netdev-native-tnl.c
@@ -508,6 +508,9 @@ netdev_vxlan_pop_header(struct dp_packet *packet)
 ovs_be32 vx_flags;
 enum packet_type next_pt = PT_ETH;
 
+ovs_assert(packet->l3_ofs > 0);
+ovs_assert(packet->l4_ofs > 0);
+
 pkt_metadata_init_tnl(md);
 if (VXLAN_HLEN > dp_packet_l4_size(packet)) {
 goto err;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 4/4] doc: Update configure section with prefetchwt1 details.

2018-01-12 Thread Bhanuprakash Bodireddy
Inspite of specifying -march=native when using Low Temporal Write(OPCH_LTW),
the compiler generates 'prefetchw' instruction instead of 'prefetchwt1'
instruction available on processor as in 'Case B'. To make the compiler emit
prefetchwt1 instruction, -mprefetchwt1 needs to be passed to configure
explicitly.

[Problem]
  Case A:
OVS_PREFETCH_CACHE(addr, OPCH_HTW)  [__builtin_prefetch(addr, 1, 3)]
[Assembly]
leaq-112(%rbp), %rax
prefetchw  (%rax)

  Case B:
OVS_PREFETCH_CACHE(addr, OPCH_LTW)  [__builtin_prefetch(addr, 1, 1)]
[Assembly]
leaq-112(%rbp), %rax
prefetchw  (%rax) <***problem***>

[Solution]
   ./configure CFLAGS="-g -O2 -mprefetchwt1"

  Case B:
OVS_PREFETCH_CACHE(addr, OPCH_LTW)  [__builtin_prefetch(addr, 1, 1)]
[Assembly]
leaq-112(%rbp), %rax
prefetchwt1  (%rax)

See also:
https://mail.openvswitch.org/pipermail/ovs-dev/2017-December/341591.html

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 Documentation/intro/install/general.rst | 13 +
 1 file changed, 13 insertions(+)

diff --git a/Documentation/intro/install/general.rst 
b/Documentation/intro/install/general.rst
index 718e5c2..4d2db45 100644
--- a/Documentation/intro/install/general.rst
+++ b/Documentation/intro/install/general.rst
@@ -280,6 +280,19 @@ With this, GCC will detect the processor and automatically 
set appropriate
 flags for it. This should not be used if you are compiling OVS outside the
 target machine.
 
+Compilers(gcc) won't emit prefetchwt1 instruction even with '-march=native'
+specified. In such case, -mprefetchwt1 needs to be explicitly passed during
+configuration.
+
+For example inspite of specifying -march=native when using Low Temporal Write
+i.e OVS_PREFETCH_CACHE(addr, OPCH_LTW), the compiler generates 'prefetchw'
+instruction instead of 'prefetchwt1' instruction available on processor.
+
+To make the compiler generate the appropriate instruction, it is recommended
+to pass ``-mprefetchwt1`` settings::
+
+$ ./configure CFLAGS="-g -O2 -march=native -mprefetchwt1"
+
 .. note::
   CFLAGS are not applied when building the Linux kernel module. Custom CFLAGS
   for the kernel module are supplied using the ``EXTRA_CFLAGS`` variable when
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/4] util: Use OPCH_NTR type while prefetching packet metadata.

2018-01-12 Thread Bhanuprakash Bodireddy
OVS_PREFETCH by default uses OPCH_HTR(High Temporal Read), meaning
the prefetch is in preparation for a future read and the prefetched
data is made available in all levels of caches.

However the pkt_metadata_prefetch_init() prefetches the metadata so that
the data is readily available when pkt_metadata_init() zeroes out the
same. So a 'write' operation is actually performed instead of anticipated
'read' on the prefetched data.

Doing a 'write' isn't a problem as the metadata isn't shared between the
threads and doesn't need invalidation across other cores and so read
prefetch is enough with *non-temporal* reference so not to pollute the
cache. This change seems to positively affect performance.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/packets.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/packets.h b/lib/packets.h
index 9a71aa3..5f50fe2 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -166,14 +166,14 @@ pkt_metadata_prefetch_init(struct pkt_metadata *md)
 {
 /* Prefetch cacheline0 as members till ct_state and odp_port will
  * be initialized later in pkt_metadata_init(). */
-OVS_PREFETCH(md->cacheline0);
+OVS_PREFETCH_CACHE(md->cacheline0, OPCH_NTR);
 
 /* Prefetch cacheline1 as members of this cacheline will be zeroed out
  * in pkt_metadata_init_tnl(). */
-OVS_PREFETCH(md->cacheline1);
+OVS_PREFETCH_CACHE(md->cacheline1, OPCH_NTR);
 
 /* Prefetch cachline2 as ip_dst & ipv6_dst fields will be initialized. */
-OVS_PREFETCH(md->cacheline2);
+OVS_PREFETCH_CACHE(md->cacheline2, OPCH_NTR);
 }
 
 bool dpid_from_string(const char *s, uint64_t *dpidp);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/4] util: Extend ovs_prefetch_range to include prefetch type.

2018-01-12 Thread Bhanuprakash Bodireddy
With ovs_prefetch_range(), large amounts of data can be prefetched in to
caches. Prefetch type gives better control over data caching strategy;
Meaning where the data should be prefetched(L1/L2/L3) and if the data
reference is temporal or non-temporal.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/pvector.h | 6 --
 lib/util.h| 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/pvector.h b/lib/pvector.h
index b175b21..d5655f0 100644
--- a/lib/pvector.h
+++ b/lib/pvector.h
@@ -177,7 +177,8 @@ pvector_cursor_init(const struct pvector *pvec,
 
 impl = ovsrcu_get(struct pvector_impl *, >impl);
 
-ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]);
+ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0],
+   OPCH_HTR);
 
 cursor.size = impl->size;
 cursor.vector = impl->vector;
@@ -208,7 +209,8 @@ static inline void pvector_cursor_lookahead(const struct 
pvector_cursor *cursor,
 int n, size_t size)
 {
 if (cursor->entry_idx + n < cursor->size) {
-ovs_prefetch_range(cursor->vector[cursor->entry_idx + n].ptr, size);
+ovs_prefetch_range(cursor->vector[cursor->entry_idx + n].ptr, size,
+   OPCH_HTR);
 }
 }
 
diff --git a/lib/util.h b/lib/util.h
index b6639b8..0a8ae23 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -73,13 +73,13 @@ BUILD_ASSERT_DECL(IS_POW2(CACHE_LINE_SIZE));
 typedef uint8_t OVS_CACHE_LINE_MARKER[1];
 
 static inline void
-ovs_prefetch_range(const void *start, size_t size)
+ovs_prefetch_range(const void *start, size_t size, enum ovs_prefetch_type type)
 {
 const char *addr = (const char *)start;
 size_t ofs;
 
 for (ofs = 0; ofs < size; ofs += CACHE_LINE_SIZE) {
-OVS_PREFETCH(addr + ofs);
+OVS_PREFETCH_CACHE(addr + ofs, type);
 }
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/4] compiler: Introduce OVS_PREFETCH variants.

2018-01-12 Thread Bhanuprakash Bodireddy
This commit introduces prefetch variants by using the GCC built-in
prefetch function.

The prefetch variants gives the user better control on designing data
caching strategy in order to increase cache efficiency and minimize
cache pollution. Data reference patterns here can be classified in to

 - Non-temporal(NT) - Data that is referenced once and not reused in
  immediate future.
 - Temporal - Data will be used again soon.

The Macro variants can be used where there are
 - Predictable memory access patterns.
 - Execution pipeline can stall if data isn't available.
 - Time consuming loops.

For example:

  OVS_PREFETCH_CACHE(addr, OPCH_LTR)
- OPCH_LTR : OVS PREFETCH CACHE HINT-LOW TEMPORAL READ.
- __builtin_prefetch(addr, 0, 1)
- Prefetch data in to L3 cache for readonly purpose.

  OVS_PREFETCH_CACHE(addr, OPCH_HTW)
- OPCH_HTW : OVS PREFETCH CACHE HINT-HIGH TEMPORAL WRITE.
- __builtin_prefetch(addr, 1, 3)
- Prefetch data in to all caches in anticipation of write. In doing
  so it invalidates other cached copies so as to gain 'exclusive'
  access.

  OVS_PREFETCH(addr)
- OPCH_HTR : OVS PREFETCH CACHE HINT-HIGH TEMPORAL READ.
- __builtin_prefetch(addr, 0, 3)
- Prefetch data in to all caches in anticipation of read and that
  data will be used again soon (HTR - High Temporal Read).

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 include/openvswitch/compiler.h | 147 ++---
 1 file changed, 139 insertions(+), 8 deletions(-)

diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h
index c7cb930..94bb24d 100644
--- a/include/openvswitch/compiler.h
+++ b/include/openvswitch/compiler.h
@@ -222,18 +222,149 @@
 static void f(void)
 #endif
 
-/* OVS_PREFETCH() can be used to instruct the CPU to fetch the cache
- * line containing the given address to a CPU cache.
- * OVS_PREFETCH_WRITE() should be used when the memory is going to be
- * written to.  Depending on the target CPU, this can generate the same
- * instruction as OVS_PREFETCH(), or bring the data into the cache in an
- * exclusive state. */
 #if __GNUC__
-#define OVS_PREFETCH(addr) __builtin_prefetch((addr))
-#define OVS_PREFETCH_WRITE(addr) __builtin_prefetch((addr), 1)
+enum cache_locality {
+NON_TEMPORAL_LOCALITY,
+LOW_TEMPORAL_LOCALITY,
+MODERATE_TEMPORAL_LOCALITY,
+HIGH_TEMPORAL_LOCALITY
+};
+
+enum cache_rw {
+PREFETCH_READ,
+PREFETCH_WRITE
+};
+
+/* The prefetch variants gives the user better control on designing data
+ * caching strategy in order to increase cache efficiency and minimize
+ * cache pollution. Data reference patterns here can be classified in to
+ *
+ *   Non-temporal(NT) - Data that is referenced once and not reused in
+ *  immediate future.
+ *   Temporal - Data will be used again soon.
+ *
+ * The Macro variants can be used where there are
+ *   o Predictable memory access patterns.
+ *   o Execution pipeline can stall if data isn't available.
+ *   o Time consuming loops.
+ *
+ * OVS_PREFETCH_CACHE() can be used to instruct the CPU to fetch the cache
+ * line containing the given address to a CPU cache. The second argument
+ * OPCH_XXR (or) OPCH_XXW is used to hint if the prefetched data is going
+ * to be read or written to by core.
+ *
+ * Example Usage:
+ *
+ *   OVS_PREFETCH_CACHE(addr, OPCH_LTR)
+ *   - OPCH_LTR : OVS PREFETCH CACHE HINT-LOW TEMPORAL READ.
+ *   - __builtin_prefetch(addr, 0, 1)
+ *   - Prefetch data in to L3 cache for readonly purpose.
+ *
+ *   OVS_PREFETCH_CACHE(addr, OPCH_HTW)
+ *   - OPCH_HTW : OVS PREFETCH CACHE HINT-HIGH TEMPORAL WRITE.
+ *   - __builtin_prefetch(addr, 1, 3)
+ *   - Prefetch data in to all caches in anticipation of write. In doing
+ * so it invalidates other cached copies so as to gain 'exclusive'
+ * access.
+ *
+ *   OVS_PREFETCH(addr)
+ *   - OPCH_HTR : OVS PREFETCH CACHE HINT-HIGH TEMPORAL READ.
+ *   - __builtin_prefetch(addr, 0, 3)
+ *   - Prefetch data in to all caches in anticipation of read and that
+ * data will be used again soon (HTR - High Temporal Read).
+ *
+ * Implementation details of prefetch hint instructions may vary across
+ * different processors and microarchitectures.
+ *
+ * OPCH_NTW, OPCH_LTW, OPCH_MTW uses prefetchwt1 instruction and OPCH_HTW
+ * uses prefetchw instruction when available. Refer Documentation on how
+ * to enable prefetchwt1 instruction.
+ *
+ * PREFETCH HINTInstruction GCC builtin function
+ * ---
+ *   OPCH_NTR   prefetchnta  __builtin_prefetch(a, 0, 0)
+ *   OPCH_LTR   prefetcht2   __builtin_prefetch(a, 0, 1)
+ *   OPCH_MTR   prefetcht1   __builtin_prefetch(a, 0, 2)
+ *   OPCH_HTR   prefetcht0   __builtin_prefetch(a, 0, 3)
+ *
+ *   OPCH_NTW   prefetchwt1  __builtin_prefetch(a

[ovs-dev] [PATCH] doc: Update configure section with popcnt details.

2018-01-12 Thread Bhanuprakash Bodireddy
Popcnt instruction can be used to speedup hash computation on processors
with POPCNT support.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 Documentation/intro/install/general.rst | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/Documentation/intro/install/general.rst 
b/Documentation/intro/install/general.rst
index 1485bdc..718e5c2 100644
--- a/Documentation/intro/install/general.rst
+++ b/Documentation/intro/install/general.rst
@@ -265,6 +265,12 @@ intrinsics can be used by passing ``-msse4.2``::
 
 $ ./configure CFLAGS="-g -O2 -msse4.2"`
 
+Also builtin popcnt instruction can be used to speedup the counting of the
+bits set in an integer. For example on X86_64 with POPCNT support, it can be
+enabled by passing ``-mpopcnt``::
+
+$ ./configure CFLAGS="-g -O2 -mpopcnt"`
+
 If you are on a different processor and don't know what flags to choose, it is
 recommended to use ``-march=native`` settings::
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V2] dpif-netdev: Allocate dp_netdev_pmd_thread struct by xzalloc_cacheline.

2018-01-12 Thread Bhanuprakash Bodireddy
All instances of struct dp_netdev_pmd_thread are allocated by xzalloc
and therefore doesn't guarantee memory allocation aligned on
CACHE_LINE_SIZE boundary. Due to this any padding done inside
the structure with this assumption might create holes.

This commit replaces xzalloc, free with xzalloc_cacheline and
free_cacheline. With the changes the memory is 64 byte aligned.

Before: With xzalloc, all the memory is 16 byte aligned.

(gdb) p pmd
$1 = (struct dp_netdev_pmd_thread *) 0x7eff8a813010

After: With xzalloc_cacheline, all the memory is 64 byte aligned.

(gdb) p pmd
$1 = (struct dp_netdev_pmd_thread *) 0x7f39e2365040

Reported-by: Ilya Maximets 
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
v1 -> v2
  * Allocate memory for non-pmd thread using xzalloc_cacheline().

 lib/dpif-netdev.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c7d157a..ef8ebf2 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3732,7 +3732,7 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
 FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
 pmd = dp_netdev_get_pmd(dp, core->core_id);
 if (!pmd) {
-pmd = xzalloc(sizeof *pmd);
+pmd = xzalloc_cacheline(sizeof *pmd);
 dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id);
 pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
 VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.",
@@ -4565,7 +4565,7 @@ dp_netdev_set_nonpmd(struct dp_netdev *dp)
 {
 struct dp_netdev_pmd_thread *non_pmd;
 
-non_pmd = xzalloc(sizeof *non_pmd);
+non_pmd = xzalloc_cacheline(sizeof *non_pmd);
 dp_netdev_configure_pmd(non_pmd, dp, NON_PMD_CORE_ID, OVS_NUMA_UNSPEC);
 }
 
@@ -4665,7 +4665,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
 xpthread_cond_destroy(>cond);
 ovs_mutex_destroy(>cond_mutex);
 ovs_mutex_destroy(>port_mutex);
-free(pmd);
+free_cacheline(pmd);
 }
 
 /* Stops the pmd thread, removes it from the 'dp->poll_threads',
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3] packets: Prefetch the packet metadata in cacheline1.

2018-01-12 Thread Bhanuprakash Bodireddy
pkt_metadata_prefetch_init() is used to prefetch the packet metadata
before initializing the metadata in pkt_metadata_init(). This is done
for every packet in userspace datapath and is performance critical.

Commit 99fc16c0 prefetches only cachline0 and cacheline2 as the metadata
part of respective cachelines will be initialized by pkt_metadata_init().

However in VXLAN case when popping the vxlan header, netdev_vxlan_pop_header()
invokes pkt_metadata_init_tnl() which zeroes out metadata part of
cacheline1 that wasn't prefetched earlier and causes performance
degradation.

By prefetching cacheline1, 9% performance improvement is observed with
vxlan decapsulation test case for packet sizes of 118 bytes. Performance
variation is observed based on CFLAGS.

   CFLAGS="-O2"CFLAGS="-O2 -msse4.2"
  Master  4.667 Mpps Master   4.710 Mpps
  With Patch  5.045 Mpps With Patch   5.097 Mpps

  CFLAGS="-O2 -march=native" CFLAGS="-Ofast -march=native"
  Master  5.072 Mpps Master   5.349 Mpps
  With Patch  5.193 Mpps With Patch   5.378 Mpps

Fixes: 99fc16c0 ("Reorganize the pkt_metadata structure.")
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Acked by: Sugesh Chandran <sugesh.chand...@intel.com>
---
v2->v3
  * Added Ack from Sugesh.

v2->v1
  * Include the throughput stats with different CFLAG options.

 lib/packets.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/packets.h b/lib/packets.h
index f583e05..9a71aa3 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -159,7 +159,8 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
 }
 
 /* This function prefetches the cachelines touched by pkt_metadata_init()
- * For performance reasons the two functions should be kept in sync. */
+ * and pkt_metadata_init_tnl().  For performance reasons the two functions
+ * should be kept in sync. */
 static inline void
 pkt_metadata_prefetch_init(struct pkt_metadata *md)
 {
@@ -167,6 +168,10 @@ pkt_metadata_prefetch_init(struct pkt_metadata *md)
  * be initialized later in pkt_metadata_init(). */
 OVS_PREFETCH(md->cacheline0);
 
+/* Prefetch cacheline1 as members of this cacheline will be zeroed out
+ * in pkt_metadata_init_tnl(). */
+OVS_PREFETCH(md->cacheline1);
+
 /* Prefetch cachline2 as ip_dst & ipv6_dst fields will be initialized. */
 OVS_PREFETCH(md->cacheline2);
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 8/8] NEWS: Add keepalive support information in NEWS.

2017-12-08 Thread Bhanuprakash Bodireddy
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 NEWS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS b/NEWS
index 188a075..6fa69ed 100644
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,8 @@ Post-v2.8.0
  * Add support for compiling OVS with the latest Linux 4.13 kernel
- "flush-conntrack" in ovs-dpctl and ovs-appctl now accept a 5-tuple to
  delete a specific connection tracking entry.
+   - Userspace Datapath:
+ * Added Keepalive support for userspace datapath.
 
 v2.8.0 - 31 Aug 2017
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 7/8] Documentation: Update DPDK doc with Keepalive feature.

2017-12-08 Thread Bhanuprakash Bodireddy
Keepalive feature is aimed at achieving Fastpath Service Assurance
in OVS-DPDK deployments. It adds support for monitoring the packet
processing threads by dispatching heartbeats at regular intervals.

The implementation uses OvSDB for reporting the health of the PMD threads.
Any external monitoring application can query the OvSDB for status
at regular intervals (or) subscribe to OvSDB updates.

keepalive feature can be enabled through below OVSDB settings.

enable-keepalive=true
  - Keepalive feature is disabled by default and should be enabled
at startup before ovs-vswitchd daemon is started.

keepalive-interval="5000"
  - Timer interval in milliseconds for monitoring the packet
processing cores.

When KA is enabled, 'ovs-keepalive' thread shall be spawned that wakes
up at regular intervals to update the timestamp and status of pmd
threads in process map. This information shall be read by vswitchd thread
and written in to 'keepalive' column of Open_vSwitch table in OVSDB.

An external monitoring framework like collectd with ovs events support
can read (or) subscribe to the datapath status changes in ovsdb. When the state
is updated, the collectd shall be notified and will eventually relay the status
to ceilometer service running in the controller. Below is the high level
overview of deployment model.

Compute NodeControllerCompute Node

Collectd  <--> Ceilometer <>   Collectd

OvS DPDK   OvS DPDK

+-+
| VM  |
+--+--+
   \---+---/
   |
+--+---+   ++--+ +--+---+
| OVS  |-> |   ovsevents plugin| --> |   collectd   |
+--+---+   ++--+ +--+---+

+--+-+ +---++ |
| Ceilometer | <-- | collectd ceilometer plugin |  <---
+--+-+ +---++

Performance impact:
  No noticeable performance or latency impact is observed with
  KA feature enabled.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 Documentation/howto/dpdk.rst | 112 +++
 1 file changed, 112 insertions(+)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index d123819..e7a2b27 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -439,6 +439,118 @@ For certain traffic profiles with many parallel flows, 
it's recommended to set
 
 For more information on the EMC refer to :doc:`/intro/install/dpdk` .
 
+.. _dpdk_keepalive:
+
+Keepalive
+-
+
+OvS Keepalive(KA) feature is disabled by default. To enable KA feature::
+
+$ ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true
+
+The KA feature can't be enabled at run time and should be done at startup
+before ovs-vswitchd daemon is started.
+
+The default timer interval for monitoring packet processing threads is 1000ms.
+To set a different timer value, run::
+
+$ ovs-vsctl --no-wait set Open_vSwitch . \
+other_config:keepalive-interval="5000"
+
+The events comprise of thread states and the last seen timestamps. The events
+are written in to process map periodically by keepalive thread.
+
+The events in the process map are retrieved by main(vswitchd) thread and
+updated in to keepalive column of Open_vSwitch table in OVSDB. Any external
+monitoring application can read the status from OVSDB at intervals or subscribe
+to the updates so that they get notified when the changes happen on OvSDB.
+
+To monitor the datapath status using ovsdb-client, run::
+
+$ ovsdb-client monitor Open_vSwitch
+$ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive
+
+The datapath thread states are explained below::
+
+  KA_STATE_UNUSED  - Not registered to KA framework.
+  KA_STATE_ALIVE   - Thread alive.
+  KA_STATE_MISSING - Thread missed first heartbeat.
+  KA_STATE_DEAD- Thread missed two heartbeats.
+  KA_STATE_GONE- Thread missed two or more heartbeats and burried.
+  KA_STATE_SLEEP   - Thread is sleeping.
+
+To query the datapath status, run::
+
+$ ovs-appctl keepalive/pmd-health-show
+
+`collectd <https://collectd.org/>`__ has built-in support for DPDK and provides
+a `ovs_events` and `ovs_stats` plugin that can be enabled to relay the datapath
+status and the PMD status to OpenStack service `Ceilometer
+<https://docs.openstack.org/developer/ceilometer/>`__.
+
+To install and configure `collectd`, run::
+
+# Clone collectd from Git repository
+$ git clone https://github.com/collectd/collectd.git
+
+# configure and install collectd
+$ cd collectd
+$ ./build.sh
+$ ./configure --enable-syslog --enable-logfile --with-libdpdk=/usr
+$ make
+$ make install
+

[ovs-dev] [PATCH v6 5/8] bridge: Update keepalive status in OVSDB.

2017-12-08 Thread Bhanuprakash Bodireddy
This commit allows vswitchd thread to update the OVSDB with the
status of all registered PMD threads. The status can be monitored
using ovsdb-client and the sample output is below.

$ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive

rowaction keepalive
7b746190-ee71-4dcc-becf-f8cb9c7cb909 old  {
"pmd62"="ALIVE,0,9226457935188922"
"pmd63"="ALIVE,1,150678618"
"pmd64"="ALIVE,2,150678618"
"pmd65"="ALIVE,3,150678618"}

 new  {
"pmd62"="ALIVE,0,9226460230167364"
"pmd63"="ALIVE,1,150679619"
"pmd64"="ALIVE,2,150679619"
"pmd65"="ALIVE,3,150679619""}

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c   | 15 +++
 lib/keepalive.h   |  1 +
 vswitchd/bridge.c | 26 ++
 3 files changed, 42 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 7d3dbad..14ac093 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -347,6 +347,21 @@ get_ka_stats(void)
 ovs_mutex_unlock();
 }
 
+struct smap *
+ka_stats_run(void)
+{
+struct smap *ka_stats = NULL;
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+ka_stats = keepalive_stats;
+keepalive_stats = NULL;
+}
+ovs_mutex_unlock();
+
+return ka_stats;
+}
+
 /* Dispatch heartbeats from 'ovs_keepalive' thread. */
 void
 dispatch_heartbeats(void)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 2bae8f1..e84646a 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -101,6 +101,7 @@ void ka_cache_registered_threads(void);
 void ka_mark_pmd_thread_alive(int);
 void ka_mark_pmd_thread_sleep(int);
 void get_ka_stats(void);
+struct smap *ka_stats_run(void);
 void dispatch_heartbeats(void);
 void ka_init(const struct smap *);
 void ka_destroy(void);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index f70407f..55c925e 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -286,6 +286,7 @@ static bool port_is_synthetic(const struct port *);
 
 static void reconfigure_system_stats(const struct ovsrec_open_vswitch *);
 static void run_system_stats(void);
+static void run_keepalive_stats(void);
 
 static void bridge_configure_mirrors(struct bridge *);
 static struct mirror *mirror_create(struct bridge *,
@@ -403,6 +404,7 @@ bridge_init(const char *remote)
 
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_cur_cfg);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_statistics);
+ovsdb_idl_omit_alert(idl, _open_vswitch_col_keepalive);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_datapath_types);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_iface_types);
 ovsdb_idl_omit(idl, _open_vswitch_col_external_ids);
@@ -2686,6 +2688,29 @@ run_system_stats(void)
 }
 }
 
+void
+run_keepalive_stats(void)
+{
+struct smap *ka_stats;
+const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(idl);
+
+ka_stats = ka_stats_run();
+if (ka_stats && cfg) {
+struct ovsdb_idl_txn *txn;
+struct ovsdb_datum datum;
+
+txn = ovsdb_idl_txn_create(idl);
+ovsdb_datum_from_smap(, ka_stats);
+smap_destroy(ka_stats);
+ovsdb_idl_txn_write(>header_, _open_vswitch_col_keepalive,
+);
+ovsdb_idl_txn_commit(txn);
+ovsdb_idl_txn_destroy(txn);
+
+free(ka_stats);
+}
+}
+
 static const char *
 ofp12_controller_role_to_str(enum ofp12_controller_role role)
 {
@@ -3039,6 +3064,7 @@ bridge_run(void)
 run_stats_update();
 run_status_update();
 run_system_stats();
+run_keepalive_stats();
 }
 
 void
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 6/8] keepalive: Add support to query keepalive status and statistics.

2017-12-08 Thread Bhanuprakash Bodireddy
This commit adds support to query keepalive status and statistics.

  $ ovs-appctl keepalive/status
keepAlive Status: Enabled

  $ ovs-appctl keepalive/pmd-health-show

  Keepalive status

keepalive status   : Enabled
keepalive interva l: 1000 ms
keepalive init time: 21 Aug 2017 16:20:31
PMD threads: 4

 PMDCORESTATE   LAST SEEN TIMESTAMP(UTC)
pmd620  ALIVE   21 Aug 2017 16:29:31
pmd631  ALIVE   21 Aug 2017 16:29:31
pmd642  ALIVE   21 Aug 2017 16:29:31
pmd653  GONE21 Aug 2017 16:26:31

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 101 
 1 file changed, 101 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 14ac093..75c0884 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -18,11 +18,13 @@
 
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
 #include "process.h"
 #include "seq.h"
 #include "timeval.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 
@@ -362,6 +364,99 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+static void
+ka_unixctl_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+
+ds_put_format(, "keepAlive Status: %s",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
+static void
+ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_info_)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\n\t\tKeepalive status\n\n");
+
+ds_put_format(, "keepalive status   : %s\n",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+if (!ka_is_enabled()) {
+goto out;
+}
+
+ds_put_format(, "keepalive interval : %"PRIu32" ms\n",
+  get_ka_interval());
+
+char *utc = xastrftime_msec("%d %b %Y %H:%M:%S",
+ka_info.init_time, true);
+ds_put_format(, "keepalive init time: %s \n", utc);
+
+struct keepalive_info *ka_info = (struct keepalive_info *)ka_info_;
+if (OVS_UNLIKELY(!ka_info)) {
+goto out;
+}
+
+ds_put_format(, "PMD threads: %"PRIu32" \n",
+  ka_info->thread_cnt);
+ds_put_format(,
+  "\n PMD\tCORE\tSTATE\tLAST SEEN TIMESTAMP(UTC)\n");
+
+struct ka_process_info *pinfo, *pinfo_next;
+
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+char *state = NULL;
+
+if (pinfo->state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_UNUSED:
+break;
+default:
+OVS_NOT_REACHED();
+}
+
+utc = xastrftime_msec("%d %b %Y %H:%M:%S",
+pinfo->last_seen_time, true);
+
+ds_put_format(, "%s\t%2d\t%s\t%s\n",
+  pinfo->name, pinfo->core_id, state, utc);
+
+free(utc);
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+
+ds_put_format(, "\n");
+out:
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 /* Dispatch heartbeats from 'ovs_keepalive' thread. */
 void
 dispatch_heartbeats(void)
@@ -424,6 +519,12 @@ ka_init(const struct smap *ovs_other_config)
 
 ka_info.init_time = time_wall_msec();
 
+unixctl_command_register("keepalive/status", "", 0, 0,
+  ka_unixctl_status, NULL);
+
+unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
+  ka_unixctl_pmd_health_show, _info);
+
 ovsthread_once_done(_enable);
 }
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 4/8] keepalive: Retrieve PMD status periodically.

2017-12-08 Thread Bhanuprakash Bodireddy
This commit implements APIs to retrieve the PMD thread status and return
the status in the below format for each PMD thread.

  Format: pmdid="status,core id,last_seen_timestamp(epoch)"
  eg: pmd62="ALIVE,2,150332575"
  pmd63="GONE,3,150332525"

The status is periodically retrieved by keepalive thread and stored in
keepalive_stats struc which later shall be retrieved by vswitchd thread.
In case of four PMD threads the status is as below:

   "pmd62"="ALIVE,0,150332575"
   "pmd63"="ALIVE,1,150332575"
   "pmd64"="ALIVE,2,150332575"
   "pmd65"="ALIVE,3,150332575"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  1 +
 lib/keepalive.c   | 63 +++
 lib/keepalive.h   |  1 +
 3 files changed, 65 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 9021906..e9fa3c1 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1039,6 +1039,7 @@ ovs_keepalive(void *f_)
 /* Dispatch heartbeats only if pmd[s] exist. */
 if (hb_enable) {
 dispatch_heartbeats();
+get_ka_stats();
 }
 
 xnanosleep(interval * 1000 * 1000);
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 0e4b2b6..7d3dbad 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -19,6 +19,7 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "ovs-thread.h"
 #include "process.h"
 #include "seq.h"
 #include "timeval.h"
@@ -29,6 +30,9 @@ static bool keepalive_enable = false;  /* Keepalive 
disabled by default. */
 static uint32_t keepalive_timer_interval;  /* keepalive timer interval. */
 static struct keepalive_info ka_info;
 
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct smap *keepalive_stats OVS_GUARDED_BY(mutex);
+
 /* Returns true if state update is allowed, false otherwise. */
 static bool
 ka_can_update_state(void)
@@ -284,6 +288,65 @@ ka_mark_pmd_thread_sleep(int tid)
 }
 }
 
+static void
+get_pmd_status(struct smap *ka_pmd_stats)
+OVS_REQUIRES(ka_info.proclist_mutex)
+{
+struct ka_process_info *pinfo, *pinfo_next;
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info.process_list) {
+char *state = NULL;
+if (pinfo->state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_UNUSED:
+break;
+default:
+OVS_NOT_REACHED();
+}
+
+smap_add_format(ka_pmd_stats, pinfo->name, "%s,%d,%ld",
+state, pinfo->core_id, pinfo->last_seen_time);
+}
+}
+
+void
+get_ka_stats(void)
+{
+struct smap *ka_pmd_stats;
+ka_pmd_stats = xmalloc(sizeof *ka_pmd_stats);
+smap_init(ka_pmd_stats);
+
+ovs_mutex_lock(_info.proclist_mutex);
+get_pmd_status(ka_pmd_stats);
+ovs_mutex_unlock(_info.proclist_mutex);
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+smap_destroy(keepalive_stats);
+free(keepalive_stats);
+keepalive_stats = NULL;
+}
+keepalive_stats = ka_pmd_stats;
+ovs_mutex_unlock();
+}
+
 /* Dispatch heartbeats from 'ovs_keepalive' thread. */
 void
 dispatch_heartbeats(void)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index cbc2387..2bae8f1 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -100,6 +100,7 @@ void ka_free_cached_threads(void);
 void ka_cache_registered_threads(void);
 void ka_mark_pmd_thread_alive(int);
 void ka_mark_pmd_thread_sleep(int);
+void get_ka_stats(void);
 void dispatch_heartbeats(void);
 void ka_init(const struct smap *);
 void ka_destroy(void);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 2/8] dpif-netdev: Register packet processing cores to KA framework.

2017-12-08 Thread Bhanuprakash Bodireddy
This commit registers the packet processing PMD threads to keepalive
framework. Only PMDs that have rxqs mapped will be registered and
actively monitored by KA framework.

This commit spawns a keepalive thread that will dispatch heartbeats to
PMD threads. The pmd threads respond to heartbeats by marking themselves
alive. As long as PMD responds to heartbeats it is considered 'healthy'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  79 ++
 lib/keepalive.c   | 194 --
 lib/keepalive.h   |  20 ++
 lib/ovs-thread.c  |   6 ++
 lib/ovs-thread.h  |   1 +
 lib/util.c|  22 +++
 lib/util.h|   1 +
 7 files changed, 318 insertions(+), 5 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 07f6113..c978a76 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -49,6 +49,7 @@
 #include "flow.h"
 #include "hmapx.h"
 #include "id-pool.h"
+#include "keepalive.h"
 #include "latch.h"
 #include "netdev.h"
 #include "netdev-vport.h"
@@ -592,6 +593,7 @@ struct dp_netdev_pmd_thread {
 atomic_bool reload; /* Do we need to reload ports? */
 pthread_t thread;
 unsigned core_id;   /* CPU core id of this pmd thread. */
+pid_t tid;  /* PMD thread tid. */
 int numa_id;/* numa node id of this pmd thread. */
 bool isolated;
 
@@ -1018,6 +1020,72 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void *
+ovs_keepalive(void *f_ OVS_UNUSED)
+{
+pthread_detach(pthread_self());
+
+for (;;) {
+uint64_t interval;
+
+interval = get_ka_interval();
+xnanosleep(interval * 1000 * 1000);
+}
+
+return NULL;
+}
+
+/* Kickstart 'ovs_keepalive' thread. */
+static void
+ka_thread_start(struct dp_netdev *dp)
+{
+static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start()) {
+ovs_thread_create("ovs_keepalive", ovs_keepalive, dp);
+
+ovsthread_once_done();
+}
+}
+
+/* Register the datapath threads. This gets invoked on every datapath
+ * reconfiguration. The pmd thread[s] having rxq[s] mapped will be
+ * registered to KA framework.
+ */
+static void
+ka_register_datapath_threads(struct dp_netdev *dp)
+{
+if (!ka_is_enabled()) {
+return;
+}
+
+ka_thread_start(dp);
+
+ka_reload_datapath_threads_begin();
+
+struct dp_netdev_pmd_thread *pmd;
+CMAP_FOR_EACH (pmd, node, >poll_threads) {
+/*  Register only PMD threads. */
+if (pmd->core_id != NON_PMD_CORE_ID) {
+/* Skip PMD thread with no rxqs mapping. */
+if (OVS_UNLIKELY(!hmap_count(>poll_list))) {
+/* Rxq mapping changes due to datapath reconfiguration.
+ * If no rxqs mapped to PMD now due to reconfiguration,
+ * unregister the pmd thread. */
+ka_unregister_thread(pmd->tid);
+continue;
+}
+
+ka_register_thread(pmd->tid);
+VLOG_INFO("Registered PMD thread [%d] on Core[%d] to KA framework",
+  pmd->tid, pmd->core_id);
+}
+}
+ka_cache_registered_threads();
+
+ka_reload_datapath_threads_end();
+}
+
 static void
 dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc,
   const char *argv[], void *aux OVS_UNUSED)
@@ -3819,6 +3887,9 @@ reconfigure_datapath(struct dp_netdev *dp)
 
 /* Reload affected pmd threads. */
 reload_affected_pmds(dp);
+
+/* Register datapath threads to KA monitoring. */
+ka_register_datapath_threads(dp);
 }
 
 /* Returns true if one of the netdevs in 'dp' requires a reconfiguration */
@@ -4023,6 +4094,8 @@ pmd_thread_main(void *f_)
 
 /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
 ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
+/* Stores tid in to 'pmd->tid'. */
+ovsthread_set_tid(>tid);
 ovs_numa_thread_setaffinity_core(pmd->core_id);
 dpdk_set_lcore_id(pmd->core_id);
 poll_cnt = pmd_load_queues_and_ports(pmd, _list);
@@ -4056,6 +4129,9 @@ reload:
   : PMD_CYCLES_IDLE);
 }
 
+/* Mark PMD thread alive. */
+ka_mark_pmd_thread_alive(pmd->tid);
+
 if (lc++ > 1024) {
 bool reload;
 
@@ -4089,6 +4165,9 @@ reload:
 }
 
 emc_cache_uninit(>flow_cache);
+
+ka_unregister_thread(pmd->tid);
+
 free(poll_list);
 pmd_free_cached_ports(pmd);
 return NULL;
diff --git a/lib/keepalive.c b/lib/keepalive.c
index ca8dccb..b04877f 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -19,6 +19,7 @@
 #include "keepalive.h"
 #include "lib/vswi

[ovs-dev] [PATCH RFC 5/5] dpif-netdev: Prefetch the cacheline having the cycle stats.

2017-12-04 Thread Bhanuprakash Bodireddy
Prefetch the cacheline having the cycle stats so that we can speed up
the cycles_count_start() and cycles_count_intermediate().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index b74b5d7..ab13d83 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -576,7 +576,7 @@ struct dp_netdev_pmd_thread {
 struct ovs_mutex flow_mutex;
 /* 8 pad bytes. */
 );
-PADDED_MEMBERS(CACHE_LINE_SIZE,
+PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cachelineC,
 struct cmap flow_table OVS_GUARDED; /* Flow table. */
 
 /* One classifier per in_port polled by the pmd */
@@ -4082,6 +4082,7 @@ reload:
 lc = UINT_MAX;
 }
 
+OVS_PREFETCH_CACHE(>cachelineC, OPCH_HTW);
 cycles_count_start(pmd);
 for (;;) {
 for (i = 0; i < poll_cnt; i++) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH RFC 4/5] util: Use OPCH_NTR type while prefetching packet metadata.

2017-12-04 Thread Bhanuprakash Bodireddy
OVS_PREFETCH by default uses OPCH_HTR(High Temporal Read), meaning
the prefetch is in preparation for a future read and the prefetched
data is made available in all levels of caches.

However the pkt_metadata_prefetch_init() prefetches the metadata so that
the data is readily available when pkt_metadata_init() zeroes out the
same. So a 'write' operation is actually performed instead of anticipated
'read' on the prefetched data.

Doing a 'write' isn't a problem as the metadata isn't shared between the
threads and doesn't need invalidation across other cores and so read
prefetch is enough with non-temporal reference so not to pollute the
cache. This change seems to positively affect performance.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/packets.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/packets.h b/lib/packets.h
index 13ea46d..e422fc0 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -165,10 +165,10 @@ pkt_metadata_prefetch_init(struct pkt_metadata *md)
 {
 /* Prefetch cacheline0 as members till ct_state and odp_port will
  * be initialized later in pkt_metadata_init(). */
-OVS_PREFETCH(md->cacheline0);
+OVS_PREFETCH_CACHE(md->cacheline0, OPCH_NTR);
 
 /* Prefetch cachline2 as ip_dst & ipv6_dst fields will be initialized. */
-OVS_PREFETCH(md->cacheline2);
+OVS_PREFETCH_CACHE(md->cacheline2, OPCH_NTR);
 }
 
 bool dpid_from_string(const char *s, uint64_t *dpidp);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH RFC 3/5] util: Extend ovs_prefetch_range to include prefetch type.

2017-12-04 Thread Bhanuprakash Bodireddy
With ovs_prefetch_range(), large amounts of data can be prefetched in to
caches. Prefetch type gives better control over data caching strategy;
Meaning where the data should be prefetched(L1/L2/L3) and if the data
reference is temporal or non-temporal.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/pvector.h | 6 --
 lib/util.h| 4 ++--
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/lib/pvector.h b/lib/pvector.h
index b175b21..d5655f0 100644
--- a/lib/pvector.h
+++ b/lib/pvector.h
@@ -177,7 +177,8 @@ pvector_cursor_init(const struct pvector *pvec,
 
 impl = ovsrcu_get(struct pvector_impl *, >impl);
 
-ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0]);
+ovs_prefetch_range(impl->vector, impl->size * sizeof impl->vector[0],
+   OPCH_HTR);
 
 cursor.size = impl->size;
 cursor.vector = impl->vector;
@@ -208,7 +209,8 @@ static inline void pvector_cursor_lookahead(const struct 
pvector_cursor *cursor,
 int n, size_t size)
 {
 if (cursor->entry_idx + n < cursor->size) {
-ovs_prefetch_range(cursor->vector[cursor->entry_idx + n].ptr, size);
+ovs_prefetch_range(cursor->vector[cursor->entry_idx + n].ptr, size,
+   OPCH_HTR);
 }
 }
 
diff --git a/lib/util.h b/lib/util.h
index b01f421..f01ac7a 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -71,13 +71,13 @@ BUILD_ASSERT_DECL(IS_POW2(CACHE_LINE_SIZE));
 typedef uint8_t OVS_CACHE_LINE_MARKER[1];
 
 static inline void
-ovs_prefetch_range(const void *start, size_t size)
+ovs_prefetch_range(const void *start, size_t size, enum ovs_prefetch_type type)
 {
 const char *addr = (const char *)start;
 size_t ofs;
 
 for (ofs = 0; ofs < size; ofs += CACHE_LINE_SIZE) {
-OVS_PREFETCH(addr + ofs);
+OVS_PREFETCH_CACHE(addr + ofs, type);
 }
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH RFC 2/5] configure: Include -mprefetchwt1 explicitly.

2017-12-04 Thread Bhanuprakash Bodireddy
Processors support prefetch instruction in anticipation of write but
compilers(gcc) won't use them unless explicitly asked to do so even
with '-march=native' specified.

[Problem]
  Case A:
OVS_PREFETCH_CACHE(addr, OPCH_HTW)
   __builtin_prefetch(addr, 1, 3)
 leaq-112(%rbp), %rax[Assembly]
 prefetchw  (%rax)

  Case B:
OVS_PREFETCH_CACHE(addr, OPCH_LTW)
   __builtin_prefetch(addr, 1, 1)
 leaq-112(%rbp), %rax[Assembly]
 prefetchw  (%rax) <***problem***>

  Inspite of specifying -march=native and using Low Temporal Write(OPCH_LTW),
  the compiler generates 'prefetchw' instruction instead of 'prefetchwt1'
  instruction available on processor.

[Solution]
  Include -mprefetchwt1

  Case B:
OVS_PREFETCH_CACHE(addr, OPCH_LTW)
   __builtin_prefetch(addr, 1, 1)
 leaq-112(%rbp), %rax[Assembly]
 prefetchwt1  (%rax)

[Testing]
  $ ./boot.sh
  $ ./configure
 checking target hint for cgcc... x86_64
 checking whether gcc accepts -mprefetchwt1... yes
  $ make -j

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 configure.ac | 1 +
 1 file changed, 1 insertion(+)

diff --git a/configure.ac b/configure.ac
index 6a8113a..8f4fbe2 100644
--- a/configure.ac
+++ b/configure.ac
@@ -171,6 +171,7 @@ OVS_CONDITIONAL_CC_OPTION([-Wno-unused], [HAVE_WNO_UNUSED])
 OVS_CONDITIONAL_CC_OPTION([-Wno-unused-parameter], [HAVE_WNO_UNUSED_PARAMETER])
 OVS_ENABLE_WERROR
 OVS_ENABLE_SPARSE
+OVS_ENABLE_OPTION([-mprefetchwt1])
 OVS_CTAGS_IDENTIFIERS
 
 AC_ARG_VAR(KARCH, [Kernel Architecture String])
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH RFC 1/5] compiler: Introduce OVS_PREFETCH variants.

2017-12-04 Thread Bhanuprakash Bodireddy
This commit introduces prefetch variants by using the GCC built-in
prefetch function.

The prefetch variants gives the user better control on designing data
caching strategy in order to increase cache efficiency and minimize
cache pollution. Data reference patterns here can be classified in to

 - Non-temporal(NT) - Data that is referenced once and not reused in
  immediate future.
 - Temporal - Data will be used again soon.

The Macro variants can be used where there are
 - Predictable memory access patterns.
 - Execution pipeline can stall if data isn't available.
 - Time consuming loops.

For example:

  OVS_PREFETCH_CACHE(addr, OPCH_LTR)
- OPCH_LTR : OVS PREFETCH CACHE HINT-LOW TEMPORAL READ.
- __builtin_prefetch(addr, 0, 1)
- Prefetch data in to L3 cache for readonly purpose.

  OVS_PREFETCH_CACHE(addr, OPCH_HTW)
- OPCH_HTW : OVS PREFETCH CACHE HINT-HIGH TEMPORAL WRITE.
- __builtin_prefetch(addr, 1, 3)
- Prefetch data in to all caches in anticipation of write. In doing
  so it invalidates other cached copies so as to gain 'exclusive'
  access.

  OVS_PREFETCH(addr)
- OPCH_HTR : OVS PREFETCH CACHE HINT-HIGH TEMPORAL READ.
- __builtin_prefetch(addr, 0, 3)
- Prefetch data in to all caches in anticipation of read and that
  data will be used again soon (HTR - High Temporal Read).

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 include/openvswitch/compiler.h | 90 --
 1 file changed, 87 insertions(+), 3 deletions(-)

diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h
index c7cb930..5d5553a 100644
--- a/include/openvswitch/compiler.h
+++ b/include/openvswitch/compiler.h
@@ -229,11 +229,95 @@
  * instruction as OVS_PREFETCH(), or bring the data into the cache in an
  * exclusive state. */
 #if __GNUC__
-#define OVS_PREFETCH(addr) __builtin_prefetch((addr))
-#define OVS_PREFETCH_WRITE(addr) __builtin_prefetch((addr), 1)
+enum cache_locality {
+NON_TEMPORAL_LOCALITY,
+LOW_TEMPORAL_LOCALITY,
+MODERATE_TEMPORAL_LOCALITY,
+HIGH_TEMPORAL_LOCALITY
+};
+
+enum cache_rw {
+PREFETCH_READ,
+PREFETCH_WRITE
+};
+
+/* Implementation details of prefetch hint instructions may vary across
+ * different processors and microarchitectures.
+ *
+ * OPCH_NTW, OPCH_LTW, OPCH_MTW uses prefetchwt1 instruction and OPCH_HTW
+ * uses prefetchw instruction when available.
+ * */
+#define OVS_PREFETCH_CACHE_HINT \
+OPCH(OPCH_NTR, PREFETCH_READ, NON_TEMPORAL_LOCALITY,\
+ "Fetch data to non-temporal cache to minimize cache pollution")\
+OPCH(OPCH_LTR, PREFETCH_READ, LOW_TEMPORAL_LOCALITY,\
+ "Fetch data to L2 and L3 cache")   \
+OPCH(OPCH_MTR, PREFETCH_READ, MODERATE_TEMPORAL_LOCALITY,   \
+ "Fetch data to L2 and L3 caches, same as LTR on"   \
+ "Nehalem, Westmere, Sandy Bridge and newer microarchitectures")\
+OPCH(OPCH_HTR, PREFETCH_READ, HIGH_TEMPORAL_LOCALITY,   \
+ "Fetch data in to all cache levels L1, L2 and L3") \
+OPCH(OPCH_NTW, PREFETCH_WRITE, NON_TEMPORAL_LOCALITY,   \
+ "Fetch data to L2, and L3 cache in exclusive state"\
+ "in anticipation of write")\
+OPCH(OPCH_LTW, PREFETCH_WRITE, LOW_TEMPORAL_LOCALITY,   \
+ "Fetch data to L2, and L3 cache in exclusive state")   \
+OPCH(OPCH_MTW, PREFETCH_WRITE, MODERATE_TEMPORAL_LOCALITY,  \
+ "Fetch data in to L2 and L3 caches in exclusive state")\
+OPCH(OPCH_HTW, PREFETCH_WRITE, HIGH_TEMPORAL_LOCALITY,  \
+ "Fetch data in to all cache levels in exclusive state")
+
+/* Indexes for cache prefetch types. */
+enum {
+#define OPCH(ENUM, RW, LOCALITY, EXPLANATION) ENUM##_INDEX,
+OVS_PREFETCH_CACHE_HINT
+#undef OPCH
+};
+
+/* Cache prefetch types. */
+enum ovs_prefetch_type {
+#define OPCH(ENUM, RW, LOCALITY, EXPLANATION) ENUM = 1 << ENUM##_INDEX,
+OVS_PREFETCH_CACHE_HINT
+#undef OPCH
+};
+
+#define OVS_PREFETCH_CACHE(addr, TYPE) switch(TYPE)   \
+{ \
+case OPCH_NTR:\
+__builtin_prefetch((addr), PREFETCH_READ, NON_TEMPORAL_LOCALITY); \
+break;\
+case OPCH_LTR:\
+__builtin_prefetch((addr), PREFETCH_READ, LOW_TEMPORAL_LOCA

[ovs-dev] [PATCH v2] packets: Prefetch the packet metadata in cacheline1.

2017-12-04 Thread Bhanuprakash Bodireddy
pkt_metadata_prefetch_init() is used to prefetch the packet metadata
before initializing the metadata in pkt_metadata_init(). This is done
for every packet in userspace datapath and is performance critical.

Commit 99fc16c0 prefetches only cachline0 and cacheline2 as the metadata
part of respective cachelines will be initialized by pkt_metadata_init().

However in VXLAN case when popping the vxlan header, netdev_vxlan_pop_header()
invokes pkt_metadata_init_tnl() which zeroes out metadata part of
cacheline1 that wasn't prefetched earlier and causes performance
degradation.

By prefetching cacheline1, 9% performance improvement is observed with
vxlan decapsulation test case for packet sizes of 118 bytes. Performance
variation is observed based on CFLAGS.

   CFLAGS="-O2"CFLAGS="-O2 -msse4.2"
  Master  4.667 Mpps Master   4.710 Mpps
  With Patch  5.045 Mpps With Patch   5.097 Mpps

  CFLAGS="-O2 -march=native" CFLAGS="-Ofast -march=native"
  Master  5.072 Mpps Master   5.349 Mpps
  With Patch  5.193 Mpps With Patch   5.378 Mpps

CC: Ben Pfaff <b...@ovn.org>
Fixes: 99fc16c0 ("Reorganize the pkt_metadata structure.")
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
v2->v1
 * Include the throughput stats with different CFLAG options.
 
 lib/packets.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/packets.h b/lib/packets.h
index 13ea46d..74bec5d 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -159,7 +159,8 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
 }
 
 /* This function prefetches the cachelines touched by pkt_metadata_init()
- * For performance reasons the two functions should be kept in sync. */
+ * and pkt_metadata_init_tnl().  For performance reasons the two functions
+ * should be kep in sync. */
 static inline void
 pkt_metadata_prefetch_init(struct pkt_metadata *md)
 {
@@ -167,6 +168,10 @@ pkt_metadata_prefetch_init(struct pkt_metadata *md)
  * be initialized later in pkt_metadata_init(). */
 OVS_PREFETCH(md->cacheline0);
 
+/* Prefetch cacheline1 as members of this cacheline will be zeroed out
+ * in pkt_metadata_init_tnl(). */
+OVS_PREFETCH(md->cacheline1);
+
 /* Prefetch cachline2 as ip_dst & ipv6_dst fields will be initialized. */
 OVS_PREFETCH(md->cacheline2);
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3] util: Add high resolution sleep support.

2017-11-28 Thread Bhanuprakash Bodireddy
This commit introduces xnanosleep() for the threads needing high
resolution sleep timeouts.

usleep() that provides microsecond granularity is deprecated and threads
wanting sub-second(ms,us,ns) granularity can use this implementation.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
Acked-by: Alin Gabriel Serdean <aserd...@ovn.org>
---
v2 -> v3
  * Replace NSEC_PER_SEC Macro with (1000 * 1000 * 1000)

 lib/timeval.c | 19 +++
 lib/timeval.h |  1 +
 lib/util.c| 35 +++
 lib/util.h|  1 +
 4 files changed, 56 insertions(+)

diff --git a/lib/timeval.c b/lib/timeval.c
index b60bf30..193c7ba 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -514,6 +514,25 @@ msec_to_timespec(long long int ms, struct timespec *ts)
 ts->tv_nsec = (ms % 1000) * 1000 * 1000;
 }
 
+void
+nsec_to_timespec(long long int nsec, struct timespec *ts)
+{
+if (!nsec) {
+ts->tv_sec = ts->tv_nsec = 0;
+return;
+}
+ts->tv_sec = nsec / (1000 * 1000 * 1000);
+
+nsec = nsec % (1000 * 1000 * 1000);
+/* This is to handle dates before epoch. */
+if (OVS_UNLIKELY(nsec < 0)) {
+nsec += 1000 * 1000 * 1000;
+ts->tv_sec--;
+}
+
+ts->tv_nsec = nsec;
+}
+
 static void
 timewarp_work(void)
 {
diff --git a/lib/timeval.h b/lib/timeval.h
index c3dbb51..08d7a9e 100644
--- a/lib/timeval.h
+++ b/lib/timeval.h
@@ -73,6 +73,7 @@ size_t strftime_msec(char *s, size_t max, const char *format,
  const struct tm_msec *);
 void xgettimeofday(struct timeval *);
 void xclock_gettime(clock_t, struct timespec *);
+void nsec_to_timespec(long long int , struct timespec *);
 
 int get_cpu_usage(void);
 
diff --git a/lib/util.c b/lib/util.c
index 9e6edd2..62f5fa2 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -2205,6 +2205,41 @@ xsleep(unsigned int seconds)
 ovsrcu_quiesce_end();
 }
 
+/* High resolution sleep. */
+void
+xnanosleep(uint64_t nanoseconds)
+{
+ovsrcu_quiesce_start();
+#ifndef _WIN32
+int retval;
+struct timespec ts_sleep;
+nsec_to_timespec(nanoseconds, _sleep);
+
+int error = 0;
+do {
+retval = nanosleep(_sleep, NULL);
+error = retval < 0 ? errno : 0;
+} while (error == EINTR);
+#else
+HANDLE timer = CreateWaitableTimer(NULL, FALSE, NULL);
+if (timer) {
+LARGE_INTEGER duetime;
+duetime.QuadPart = -nanoseconds;
+if (SetWaitableTimer(timer, , 0, NULL, NULL, FALSE)) {
+WaitForSingleObject(timer, INFINITE);
+} else {
+VLOG_ERR_ONCE("SetWaitableTimer Failed (%s)",
+   ovs_lasterror_to_string());
+}
+CloseHandle(timer);
+} else {
+VLOG_ERR_ONCE("CreateWaitableTimer Failed (%s)",
+   ovs_lasterror_to_string());
+}
+#endif
+ovsrcu_quiesce_end();
+}
+
 /* Determine whether standard output is a tty or not. This is useful to decide
  * whether to use color output or not when --color option for utilities is set
  * to `auto`.
diff --git a/lib/util.h b/lib/util.h
index 3c43c2c..d355313 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -502,6 +502,7 @@ ovs_u128_and(const ovs_u128 a, const ovs_u128 b)
 }
 
 void xsleep(unsigned int seconds);
+void xnanosleep(uint64_t nanoseconds);
 
 bool is_stdout_a_tty(void);
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] dpif-netdev: Allocate dp_netdev_pmd_thread struct by xzalloc_cacheline.

2017-11-26 Thread Bhanuprakash Bodireddy
All instances of struct dp_netdev_pmd_thread are allocated by xzalloc
and therefore doesn't guarantee memory allocation aligned on
CACHE_LINE_SIZE boundary. Due to this any padding done inside
the structure with this assumption might create holes.

This commit replaces xzalloc, free with xzalloc_cacheline and
free_cacheline. With the changes the memory is 64 byte aligned.

Before:
With xzalloc, all the memory is 16 byte aligned.

(gdb) p pmd
$1 = (struct dp_netdev_pmd_thread *) 0x7eff8a813010
(gdb) p >cacheline0
$2 = (OVS_CACHE_LINE_MARKER *) 0x7eff8a813010
(gdb) p >cacheline1
$3 = (OVS_CACHE_LINE_MARKER *) 0x7eff8a813050
(gdb) p >flow_cache
$4 = (struct emc_cache *) 0x7eff8a813090
(gdb) p >flow_table
$5 = (struct cmap *) 0x7eff8acb30d0
(gdb)  p >stats
$6 = (struct dp_netdev_pmd_stats *) 0x7eff8acb3110
(gdb) p >port_mutex
$7 = (struct ovs_mutex *) 0x7eff8acb3150
(gdb) p >poll_list
$8 = (struct hmap *) 0x7eff8acb3190
(gdb) p >tnl_port_cache
$9 = (struct hmap *) 0x7eff8acb31d0
(gdb) p >stats_zero
$10 = (unsigned long long (*)[5]) 0x7eff8acb3210

After:
With xzalloc_cacheline, all the memory is 64 byte aligned.

(gdb) p pmd
$1 = (struct dp_netdev_pmd_thread *) 0x7f39e2365040
(gdb) p >cacheline0
$2 = (OVS_CACHE_LINE_MARKER *) 0x7f39e2365040
(gdb) p >cacheline1
$3 = (OVS_CACHE_LINE_MARKER *) 0x7f39e2365080
(gdb) p >flow_cache
$4 = (struct emc_cache *) 0x7f39e23650c0
(gdb) p >flow_table
$5 = (struct cmap *) 0x7f39e2805100
(gdb) p >stats
$6 = (struct dp_netdev_pmd_stats *) 0x7f39e2805140
(gdb) p >port_mutex
$7 = (struct ovs_mutex *) 0x7f39e2805180
(gdb) p >poll_list
$8 = (struct hmap *) 0x7f39e28051c0
(gdb) p >tnl_port_cache
$9 = (struct hmap *) 0x7f39e2805200
(gdb) p >stats_zero
$10 = (unsigned long long (*)[5]) 0x7f39e2805240

Reported-by: Ilya Maximets <i.maxim...@samsung.com>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index db78318..3e281ae 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3646,7 +3646,7 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
 FOR_EACH_CORE_ON_DUMP(core, pmd_cores) {
 pmd = dp_netdev_get_pmd(dp, core->core_id);
 if (!pmd) {
-pmd = xzalloc(sizeof *pmd);
+pmd = xzalloc_cacheline(sizeof *pmd);
 dp_netdev_configure_pmd(pmd, dp, core->core_id, core->numa_id);
 pmd->thread = ovs_thread_create("pmd", pmd_thread_main, pmd);
 VLOG_INFO("PMD thread on numa_id: %d, core id: %2d created.",
@@ -4574,7 +4574,7 @@ dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd)
 xpthread_cond_destroy(>cond);
 ovs_mutex_destroy(>cond_mutex);
 ovs_mutex_destroy(>port_mutex);
-free(pmd);
+free_cacheline(pmd);
 }
 
 /* Stops the pmd thread, removes it from the 'dp->poll_threads',
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] packets: Prefetch the packet metadata in cacheline1.

2017-11-17 Thread Bhanuprakash Bodireddy
pkt_metadata_prefetch_init() is used to prefetch the packet metadata
before initializing the metadata in pkt_metadata_init(). This is done
for every packet in userspace datapath and is performance critical.

Commit 99fc16c0 prefetches only cachline0 and cacheline2 as the metadata
part of respective cachelines will be initialized by pkt_metadata_init().

However in VXLAN case when popping the vxlan header, netdev_vxlan_pop_header()
invokes pkt_metadata_init_tnl() which zeroes out metadata part of
cacheline1 that wasn't prefetched earlier and causes performance
degradation.

By prefetching cacheline1, 9% performance improvement is observed.

CC: Ben Pfaff <b...@ovn.org>
Fixes: 99fc16c0 ("Reorganize the pkt_metadata structure.")
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/packets.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/lib/packets.h b/lib/packets.h
index 461f488..2e8c0f1 100644
--- a/lib/packets.h
+++ b/lib/packets.h
@@ -159,7 +159,8 @@ pkt_metadata_init(struct pkt_metadata *md, odp_port_t port)
 }
 
 /* This function prefetches the cachelines touched by pkt_metadata_init()
- * For performance reasons the two functions should be kept in sync. */
+ * and pkt_metadata_init_tnl().  For performance reasons the two functions
+ * should be kept in sync. */
 static inline void
 pkt_metadata_prefetch_init(struct pkt_metadata *md)
 {
@@ -167,6 +168,10 @@ pkt_metadata_prefetch_init(struct pkt_metadata *md)
  * be initialized later in pkt_metadata_init(). */
 OVS_PREFETCH(md->cacheline0);
 
+/* Prefetch cacheline1 as members of this cacheline will be zeroed out
+ * in pkt_metadata_init_tnl(). */
+OVS_PREFETCH(md->cacheline1);
+
 /* Prefetch cachline2 as ip_dst & ipv6_dst fields will be initialized. */
 OVS_PREFETCH(md->cacheline2);
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 2/2] util: Add high resolution sleep support.

2017-11-14 Thread Bhanuprakash Bodireddy
This commit introduces xnanosleep() for the threads needing high
resolution sleep timeouts.

usleep() that provides microsecond granularity is deprecated and
threads wanting sub-second(ms,us,ns) granularity can use this implementation.

CC: Aaron Conole <acon...@redhat.com>
CC: Alin Gabriel Serdean <aserd...@ovn.org>
CC: Ben Pfaff <b...@ovn.org>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/timeval.c | 19 +++
 lib/timeval.h |  1 +
 lib/util.c| 35 +++
 lib/util.h|  1 +
 4 files changed, 56 insertions(+)

diff --git a/lib/timeval.c b/lib/timeval.c
index 567c26e..2fde90f 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -517,6 +517,25 @@ msec_to_timespec(long long int ms, struct timespec *ts)
 ts->tv_nsec = (ms % 1000) * NSEC_PER_MSEC;
 }
 
+void
+nsec_to_timespec(long long int nsec, struct timespec *ts)
+{
+if (!nsec) {
+ts->tv_sec = ts->tv_nsec = 0;
+return;
+}
+ts->tv_sec = nsec / NSEC_PER_SEC;
+
+nsec = nsec % NSEC_PER_SEC;
+/* This is to handle dates before epoch. */
+if (OVS_UNLIKELY(nsec < 0)) {
+nsec += NSEC_PER_SEC;
+ts->tv_sec--;
+}
+
+ts->tv_nsec = nsec;
+}
+
 static void
 timewarp_work(void)
 {
diff --git a/lib/timeval.h b/lib/timeval.h
index 5e2a731..095c334 100644
--- a/lib/timeval.h
+++ b/lib/timeval.h
@@ -80,6 +80,7 @@ size_t strftime_msec(char *s, size_t max, const char *format,
  const struct tm_msec *);
 void xgettimeofday(struct timeval *);
 void xclock_gettime(clock_t, struct timespec *);
+void nsec_to_timespec(long long int , struct timespec *);
 
 int get_cpu_usage(void);
 
diff --git a/lib/util.c b/lib/util.c
index 17c2c99..2c184d9 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -2205,6 +2205,41 @@ xsleep(unsigned int seconds)
 ovsrcu_quiesce_end();
 }
 
+/* High resolution sleep. */
+void
+xnanosleep(uint64_t nanoseconds)
+{
+ovsrcu_quiesce_start();
+#ifndef _WIN32
+int retval;
+struct timespec ts_sleep;
+nsec_to_timespec(nanoseconds, _sleep);
+
+int error = 0;
+do {
+retval = nanosleep(_sleep, NULL);
+error = retval < 0 ? errno : 0;
+} while (error == EINTR);
+#else
+HANDLE timer = CreateWaitableTimer(NULL, FALSE, NULL);
+if (timer) {
+LARGE_INTEGER duetime;
+duetime.QuadPart = -nanoseconds;
+if (SetWaitableTimer(timer, , 0, NULL, NULL, FALSE)) {
+WaitForSingleObject(timer, INFINITE);
+} else {
+VLOG_ERR_ONCE("SetWaitableTimer Failed (%s)",
+   ovs_lasterror_to_string());
+}
+CloseHandle(timer);
+} else {
+VLOG_ERR_ONCE("CreateWaitableTimer Failed (%s)",
+   ovs_lasterror_to_string());
+}
+#endif
+ovsrcu_quiesce_end();
+}
+
 /* Determine whether standard output is a tty or not. This is useful to decide
  * whether to use color output or not when --color option for utilities is set
  * to `auto`.
diff --git a/lib/util.h b/lib/util.h
index 3c43c2c..d355313 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -502,6 +502,7 @@ ovs_u128_and(const ovs_u128 a, const ovs_u128 b)
 }
 
 void xsleep(unsigned int seconds);
+void xnanosleep(uint64_t nanoseconds);
 
 bool is_stdout_a_tty(void);
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 1/2] timeval: Introduce macros to convert timespec and timeval.

2017-11-14 Thread Bhanuprakash Bodireddy
This commit replaces the numbers with MSEC_PER_SEC, NSEC_PER_SEC and
USEC_PER_MSEC macros when dealing with timespec and timeval.

This commit doesn't change functionality.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/timeval.c| 29 -
 lib/timeval.h|  7 +++
 lib/util.c   |  6 +++---
 ofproto/ofproto-dpif-ipfix.c |  2 +-
 4 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/lib/timeval.c b/lib/timeval.c
index b60bf30..567c26e 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -266,7 +266,7 @@ time_alarm(unsigned int secs)
 time_init();
 
 now = time_msec();
-msecs = secs * 1000LL;
+msecs = secs * MSEC_PER_SEC * 1LL;
 deadline = now < LLONG_MAX - msecs ? now + msecs : LLONG_MAX;
 }
 
@@ -372,25 +372,28 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE 
*handles OVS_UNUSED,
 long long int
 timespec_to_msec(const struct timespec *ts)
 {
-return (long long int) ts->tv_sec * 1000 + ts->tv_nsec / (1000 * 1000);
+return (long long int) ts->tv_sec * MSEC_PER_SEC +
+ts->tv_nsec / NSEC_PER_MSEC;
 }
 
 long long int
 timeval_to_msec(const struct timeval *tv)
 {
-return (long long int) tv->tv_sec * 1000 + tv->tv_usec / 1000;
+return (long long int) tv->tv_sec * MSEC_PER_SEC +
+tv->tv_usec / USEC_PER_MSEC;
 }
 
 long long int
 timespec_to_usec(const struct timespec *ts)
 {
-return (long long int) ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000;
+return (long long int) ts->tv_sec * USEC_PER_SEC +
+ts->tv_nsec / NSEC_PER_USEC;
 }
 
 long long int
 timeval_to_usec(const struct timeval *tv)
 {
-return (long long int) tv->tv_sec * 1000 * 1000 + tv->tv_usec;
+return (long long int) tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
 }
 
 /* Returns the monotonic time at which the "time" module was initialized, in
@@ -510,8 +513,8 @@ xclock_gettime(clock_t id, struct timespec *ts)
 static void
 msec_to_timespec(long long int ms, struct timespec *ts)
 {
-ts->tv_sec = ms / 1000;
-ts->tv_nsec = (ms % 1000) * 1000 * 1000;
+ts->tv_sec = ms / MSEC_PER_SEC;
+ts->tv_nsec = (ms % 1000) * NSEC_PER_MSEC;
 }
 
 static void
@@ -596,8 +599,8 @@ timespec_add(struct timespec *sum,
 
 tmp.tv_sec = a->tv_sec + b->tv_sec;
 tmp.tv_nsec = a->tv_nsec + b->tv_nsec;
-if (tmp.tv_nsec >= 1000 * 1000 * 1000) {
-tmp.tv_nsec -= 1000 * 1000 * 1000;
+if (tmp.tv_nsec >= NSEC_PER_SEC) {
+tmp.tv_nsec -= NSEC_PER_SEC;
 tmp.tv_sec++;
 }
 
@@ -621,7 +624,7 @@ log_poll_interval(long long int last_wakeup)
 {
 long long int interval = time_msec() - last_wakeup;
 
-if (interval >= 1000 && !is_warped(_clock)) {
+if (interval >= MSEC_PER_SEC && !is_warped(_clock)) {
 const struct rusage *last_rusage = get_recent_rusage();
 struct rusage rusage;
 
@@ -713,7 +716,7 @@ refresh_rusage(void)
 
 if (!getrusage_thread(recent_rusage)) {
 long long int now = time_msec();
-if (now >= t->newer.when + 3 * 1000) {
+if (now >= t->newer.when + 3 * MSEC_PER_SEC) {
 t->older = t->newer;
 t->newer.when = now;
 t->newer.cpu = (timeval_to_msec(_rusage->ru_utime) +
@@ -837,7 +840,7 @@ strftime_msec(char *s, size_t max, const char *format,
 struct tm_msec *
 localtime_msec(long long int now, struct tm_msec *result)
 {
-  time_t now_sec = now / 1000;
+  time_t now_sec = now / MSEC_PER_SEC;
   localtime_r(_sec, >tm);
   result->msec = now % 1000;
   return result;
@@ -846,7 +849,7 @@ localtime_msec(long long int now, struct tm_msec *result)
 struct tm_msec *
 gmtime_msec(long long int now, struct tm_msec *result)
 {
-  time_t now_sec = now / 1000;
+  time_t now_sec = now / MSEC_PER_SEC;
   gmtime_r(_sec, >tm);
   result->msec = now % 1000;
   return result;
diff --git a/lib/timeval.h b/lib/timeval.h
index c3dbb51..5e2a731 100644
--- a/lib/timeval.h
+++ b/lib/timeval.h
@@ -40,6 +40,13 @@ BUILD_ASSERT_DECL(TYPE_IS_SIGNED(time_t));
 #define TIME_MAX TYPE_MAXIMUM(time_t)
 #define TIME_MIN TYPE_MINIMUM(time_t)
 
+#define MSEC_PER_SEC1000L
+#define USEC_PER_SEC100L
+#define NSEC_PER_SEC10L
+#define USEC_PER_MSEC   1000L
+#define NSEC_PER_MSEC   100L
+#define NSEC_PER_USEC   1000L
+
 #ifdef _WIN32
 #define localtime_r(timep, result) localtime_s(result, timep)
 #define gmtime_r(timep, result) gmtime_s(result, timep)
diff --git a/lib/util.c b/lib/util.c
index 9e6edd2..17c2c99 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -586,7 +586,7 @@ get_boot_time(void)
 char line[128];
 FILE *stream;
 
-cache_expiration = time_msec() + 5 * 1000;
+cache_expiration = time_msec() + 5 * MSEC_PER_SEC;
 
 stream = fopen(stat_fi

[ovs-dev] [PATCH 2/2] system-stats: Include core number in the process stats.

2017-11-08 Thread Bhanuprakash Bodireddy
When dumping process statistics, include the the core number the process
was last scheduled. With 'other_config:enable-statistics=true',

Before:
  {cpu="28", file_systems="/,8474624,7826220 /workspace,223835956,199394160",
  load_average="1.29,1.76,1.33", memory="65861460,27457540,3813488,1999868,0",
  process_ovs-vswitchd="4685896,17452,362920,0,383967,383967",
  process_ovsdb-server="48088,5172,60,0,384057,384057"}

After:
  {cpu="28", file_systems="/,8474624,7826308 /workspace,223835956,199394172",
  load_average="1.30,1.04,1.13", memory="65861460,27469176,3815252,1999868,0",
  process_ovs-vswitchd="4686020,17360,127380,0,148406,148406,3",
  process_ovsdb-server="48096,5212,30,0,148496,148496,4"}

eg:
  processvsz   , rss , cputime, crashes, booted, uptime, core_id
  ovs-vswitchd="4686020,17360, 127380,  0  , 148406, 148406,  3"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 vswitchd/system-stats.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/vswitchd/system-stats.c b/vswitchd/system-stats.c
index b780476..aaab676 100644
--- a/vswitchd/system-stats.c
+++ b/vswitchd/system-stats.c
@@ -205,9 +205,10 @@ get_process_stats(struct smap *stats)
 (int) (extension - de->d_name), de->d_name);
 if (!smap_get(stats, key)) {
 if (LINUX && get_process_info(pid, )) {
-smap_add_format(stats, key, "%lu,%lu,%lld,%d,%lld,%lld",
+smap_add_format(stats, key, "%lu,%lu,%lld,%d,%lld,%lld,%d",
 pinfo.vsz, pinfo.rss, pinfo.cputime,
-pinfo.crashes, pinfo.booted, pinfo.uptime);
+pinfo.crashes, pinfo.booted, pinfo.uptime,
+pinfo.core_id);
 } else {
 smap_add(stats, key, "");
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/2] process: Extend get_process_info() for additional fields.

2017-11-08 Thread Bhanuprakash Bodireddy
This commit enables the fields relating to process name and the core
number the process was last scheduled. The fields will be used by keepalive
monitoring framework in future commits.

This commit also fixes the following "sparse" warning:

  lib/process.c:439:16: error: use of assignment suppression and length
  modifier together in gnu_scanf format [-Werror=format=].

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/process.c | 43 +++
 lib/process.h |  2 ++
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/lib/process.c b/lib/process.c
index dd678cd..78de4b8 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -64,7 +64,8 @@ struct raw_process_info {
 long long int uptime;   /* ms since started. */
 long long int cputime;  /* ms of CPU used during 'uptime'. */
 pid_t ppid; /* Parent. */
-char name[18];  /* Name (surrounded by parentheses). */
+int core_id;/* Core id last executed on. */
+char name[18];  /* Name. */
 };
 
 /* Pipe used to signal child termination. */
@@ -421,7 +422,7 @@ get_raw_process_info(pid_t pid, struct raw_process_info 
*raw)
 
 n = fscanf(stream,
"%*d "   /* (1. pid) */
-   "%17s "  /* 2. process name */
+   "(%17[^)]) " /* 2. process name */
"%*c "   /* (3. state) */
"%lu "   /* 4. ppid */
"%*d "   /* (5. pgid) */
@@ -444,33 +445,34 @@ get_raw_process_info(pid_t pid, struct raw_process_info 
*raw)
"%llu "  /* 22. start_time */
"%llu "  /* 23. vsize */
"%llu "  /* 24. rss */
+   "%*u "   /* (25. rsslim) */
+   "%*u "   /* (26. start_code) */
+   "%*u "   /* (27. end_code) */
+   "%*u "   /* (28. start_stack) */
+   "%*u "   /* (29. esp) */
+   "%*u "   /* (30. eip) */
+   "%*u "   /* (31. pending signals) */
+   "%*u "   /* (32. blocked signals) */
+   "%*u "   /* (33. ignored signals) */
+   "%*u "   /* (34. caught signals) */
+   "%*u "   /* (35. whcan) */
+   "%*u "   /* (36. always 0) */
+   "%*u "   /* (37. always 0) */
+   "%*d "   /* (38. exit_signal) */
+   "%d "/* 39. task_cpu */
 #if 0
/* These are here for documentation but #if'd out to save
 * actually parsing them from the stream for no benefit. */
-   "%*lu "  /* (25. rsslim) */
-   "%*lu "  /* (26. start_code) */
-   "%*lu "  /* (27. end_code) */
-   "%*lu "  /* (28. start_stack) */
-   "%*lu "  /* (29. esp) */
-   "%*lu "  /* (30. eip) */
-   "%*lu "  /* (31. pending signals) */
-   "%*lu "  /* (32. blocked signals) */
-   "%*lu "  /* (33. ignored signals) */
-   "%*lu "  /* (34. caught signals) */
-   "%*lu "  /* (35. whcan) */
-   "%*lu "  /* (36. always 0) */
-   "%*lu "  /* (37. always 0) */
-   "%*d "   /* (38. exit_signal) */
-   "%*d "   /* (39. task_cpu) */
"%*u "   /* (40. rt_priority) */
"%*u "   /* (41. policy) */
"%*llu " /* (42. blkio_ticks) */
"%*lu "  /* (43. gtime) */
"%*ld"   /* (44. cgtime) */
 #endif
-   , raw->name, , , , _time, , );
+   , raw->name, , , , _time,
+  , , >core_id);
 fclose(stream);
-if (n != 7) {
+if (n != 8) {
 VLOG_ERR_ONCE("%s: fscanf failed", file_name);
 return false;
 }
@@ -496,12 +498,14 @@ get_process_info(pid_t pid, struct process_info *pinfo)
 return false;
 }
 
+ovs_strlcpy(pinfo->name, child.name, sizeof pinfo->name);
 pinfo->vsz = child.vsz;
 pinfo->rss = child.rss;
 pinfo->booted = child.uptime;
 pinfo->crashes = 0;
 pinfo->uptime = child.uptime;
   

[ovs-dev] [PATCH 7/7] pcap-file: Use time macros.

2017-11-08 Thread Bhanuprakash Bodireddy
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/pcap-file.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/pcap-file.c b/lib/pcap-file.c
index 981d56d..60280fb 100644
--- a/lib/pcap-file.c
+++ b/lib/pcap-file.c
@@ -174,7 +174,7 @@ ovs_pcap_read(FILE *file, struct dp_packet **bufp, long 
long int *when)
 if (when) {
 uint32_t ts_sec = swap ? uint32_byteswap(prh.ts_sec) : prh.ts_sec;
 uint32_t ts_usec = swap ? uint32_byteswap(prh.ts_usec) : prh.ts_usec;
-*when = ts_sec * 1000LL + ts_usec / 1000;
+*when = ts_sec * MSEC_PER_SEC + ts_usec / USEC_PER_MSEC;
 }
 
 /* Read packet. Packet type is Ethernet */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 6/7] mac-learning: Use MSEC_PER_SEC Macro.

2017-11-08 Thread Bhanuprakash Bodireddy
Replace 1000 value with MSEC_PER_SEC macro when converting seconds to ms.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/mac-learning.c   | 2 +-
 lib/mcast-snooping.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/mac-learning.c b/lib/mac-learning.c
index 215caf0..166c80a 100644
--- a/lib/mac-learning.c
+++ b/lib/mac-learning.c
@@ -534,6 +534,6 @@ mac_learning_wait(struct mac_learning *ml)
 poll_immediate_wake();
 } else if (!ovs_list_is_empty(>lrus)) {
 struct mac_entry *e = mac_entry_from_lru_node(ml->lrus.next);
-poll_timer_wait_until(e->expires * 1000LL);
+poll_timer_wait_until(e->expires * MSEC_PER_SEC);
 }
 }
diff --git a/lib/mcast-snooping.c b/lib/mcast-snooping.c
index 6730301..c5bcb6c 100644
--- a/lib/mcast-snooping.c
+++ b/lib/mcast-snooping.c
@@ -916,12 +916,12 @@ mcast_snooping_wait__(struct mcast_snooping *ms)
 if (!ovs_list_is_empty(>group_lru)) {
 grp = mcast_group_from_lru_node(ms->group_lru.next);
 bundle = mcast_group_bundle_from_lru_node(grp->bundle_lru.next);
-msec = bundle->expires * 1000LL;
+msec = bundle->expires * MSEC_PER_SEC;
 }
 
 if (!ovs_list_is_empty(>mrouter_lru)) {
 mrouter = mcast_mrouter_from_lru_node(ms->mrouter_lru.next);
-mrouter_msec = mrouter->expires * 1000LL;
+mrouter_msec = mrouter->expires * MSEC_PER_SEC;
 msec = msec ? MIN(msec, mrouter_msec) : mrouter_msec;
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 5/7] ofproto: Use time macros.

2017-11-08 Thread Bhanuprakash Bodireddy
This commit replaces the numbers with appropriate macros. Also
use time_wall_usec() in ipfix_now().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 ofproto/ofproto-dpif-ipfix.c | 12 ++--
 ofproto/ofproto-dpif-sflow.c |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
index 4d16878..7afb60e 100644
--- a/ofproto/ofproto-dpif-ipfix.c
+++ b/ofproto/ofproto-dpif-ipfix.c
@@ -1673,8 +1673,8 @@ ipfix_cache_next_timeout_msec(const struct 
dpif_ipfix_exporter *exporter,
 
 LIST_FOR_EACH (entry, cache_flow_start_timestamp_list_node,
>cache_flow_start_timestamp_list) {
-*next_timeout_msec = entry->flow_start_timestamp_usec / 1000LL
-+ 1000LL * exporter->cache_active_timeout;
+*next_timeout_msec = entry->flow_start_timestamp_usec / USEC_PER_MSEC
++ USEC_PER_MSEC * exporter->cache_active_timeout;
 return true;
 }
 
@@ -1869,7 +1869,7 @@ ipfix_update_stats(struct dpif_ipfix_exporter *exporter,
 static uint64_t
 ipfix_now(void)
 {
-return time_wall_msec() * 1000ULL;
+return time_wall_usec();
 }
 
 /* Add an entry into a flow cache.  The entry is either aggregated into
@@ -2290,7 +2290,7 @@ ipfix_put_data_set(uint32_t export_time_sec,
 /* Calculate the negative deltas relative to the export time
  * in seconds sent in the header, not the exact export
  * time. */
-export_time_usec = 100LL * export_time_sec;
+export_time_usec = USEC_PER_SEC * export_time_sec;
 flow_start_delta_usec = export_time_usec
 - entry->flow_start_timestamp_usec;
 flow_end_delta_usec = export_time_usec
@@ -2620,7 +2620,7 @@ dpif_ipfix_cache_expire(struct dpif_ipfix_exporter 
*exporter,
 }
 
 max_flow_start_timestamp_usec = export_time_usec -
-100LL * exporter->cache_active_timeout;
+USEC_PER_SEC * exporter->cache_active_timeout;
 
 LIST_FOR_EACH_SAFE (entry, next_entry, 
cache_flow_start_timestamp_list_node,
 >cache_flow_start_timestamp_list) {
@@ -2672,7 +2672,7 @@ get_export_time_now(uint64_t *export_time_usec, uint32_t 
*export_time_sec)
 /* The IPFIX start and end deltas are negative deltas relative to
  * the export time, so set the export time 1 second off to
  * calculate those deltas. */
-*export_time_sec = DIV_ROUND_UP(*export_time_usec, 100);
+*export_time_sec = DIV_ROUND_UP(*export_time_usec, USEC_PER_SEC);
 }
 
 static void
diff --git a/ofproto/ofproto-dpif-sflow.c b/ofproto/ofproto-dpif-sflow.c
index ccf8964..ae33a84 100644
--- a/ofproto/ofproto-dpif-sflow.c
+++ b/ofproto/ofproto-dpif-sflow.c
@@ -1405,7 +1405,7 @@ dpif_sflow_wait(struct dpif_sflow *ds) OVS_EXCLUDED(mutex)
 {
 ovs_mutex_lock();
 if (ds->collectors != NULL) {
-poll_timer_wait_until(ds->next_tick * 1000LL);
+poll_timer_wait_until(ds->next_tick * MSEC_PER_SEC);
 }
 ovs_mutex_unlock();
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 4/7] util: Use MSEC_PER_SEC macro.

2017-11-08 Thread Bhanuprakash Bodireddy
Replace 1000 value with MSEC_PER_SEC macro when converting seconds to ms.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/util.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/util.c b/lib/util.c
index 46b5691..e2423fc 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -583,7 +583,7 @@ get_boot_time(void)
 char line[128];
 FILE *stream;
 
-cache_expiration = time_msec() + 5 * 1000;
+cache_expiration = time_msec() + 5 * MSEC_PER_SEC;
 
 stream = fopen(stat_file, "r");
 if (!stream) {
@@ -595,7 +595,7 @@ get_boot_time(void)
 while (fgets(line, sizeof line, stream)) {
 long long int btime;
 if (ovs_scan(line, "btime %lld", )) {
-boot_time = btime * 1000;
+boot_time = btime * MSEC_PER_SEC;
 goto done;
 }
 }
@@ -2195,7 +2195,7 @@ xsleep(unsigned int seconds)
 {
 ovsrcu_quiesce_start();
 #ifdef _WIN32
-Sleep(seconds * 1000);
+Sleep(seconds * MSEC_PER_SEC);
 #else
 sleep(seconds);
 #endif
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/7] util: High resolution sleep support for windows.

2017-11-08 Thread Bhanuprakash Bodireddy
This commit implements xnanosleep() for the threads needing high
resolution sleep timeouts in windows.

CC: Alin Gabriel Serdean <aserd...@ovn.org>
CC: Aaron Conole <acon...@redhat.com>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/util.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/lib/util.c b/lib/util.c
index a29e288..46b5691 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -2217,6 +2217,23 @@ xnanosleep(uint64_t nanoseconds)
 retval = nanosleep(_sleep, NULL);
 error = retval < 0 ? errno : 0;
 } while (error == EINTR);
+#else
+HANDLE timer = CreateWaitableTimer(NULL, FALSE, "NSTIMER");
+if (timer) {
+LARGE_INTEGER duetime;
+duetime.QuadPart = -nanoseconds;
+if (SetWaitableTimer(timer, , 0, NULL, NULL, FALSE)) {
+WaitForSingleObject(timer, INFINITE);
+CloseHandle(timer);
+} else {
+CloseHandle(timer);
+VLOG_ERR_ONCE("SetWaitableTimer Failed (%s)",
+   ovs_lasterror_to_string());
+}
+} else {
+VLOG_ERR_ONCE("CreateWaitableTimer Failed (%s)",
+   ovs_lasterror_to_string());
+}
 #endif
 ovsrcu_quiesce_end();
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/7] util: Add high resolution sleep support.

2017-11-08 Thread Bhanuprakash Bodireddy
This commit introduces xnanosleep() for the threads needing high
resolution sleep timeouts.

usleep() that provides microsecond granularity is deprecated and all
threads that needs ms,us,ns granularity can use this implementation.

CC: Ben Pfaff <b...@ovn.org>
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/timeval.c | 19 +++
 lib/timeval.h |  1 +
 lib/util.c| 19 +++
 lib/util.h|  1 +
 4 files changed, 40 insertions(+)

diff --git a/lib/timeval.c b/lib/timeval.c
index 8875fed..93eeb72 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -517,6 +517,25 @@ msec_to_timespec(long long int ms, struct timespec *ts)
 ts->tv_nsec = (ms % 1000) * NSEC_PER_MSEC;
 }
 
+void
+nsec_to_timespec(long long int nsec, struct timespec *ts)
+{
+if (!nsec) {
+ts->tv_sec = ts->tv_nsec = 0;
+return;
+}
+ts->tv_sec = nsec / NSEC_PER_SEC;
+
+nsec = nsec % NSEC_PER_SEC;
+/* This is to handle dates before epoch. */
+if (OVS_UNLIKELY(nsec < 0)) {
+nsec += NSEC_PER_SEC;
+ts->tv_sec--;
+}
+
+ts->tv_nsec = nsec;
+}
+
 static void
 timewarp_work(void)
 {
diff --git a/lib/timeval.h b/lib/timeval.h
index 5e2a731..095c334 100644
--- a/lib/timeval.h
+++ b/lib/timeval.h
@@ -80,6 +80,7 @@ size_t strftime_msec(char *s, size_t max, const char *format,
  const struct tm_msec *);
 void xgettimeofday(struct timeval *);
 void xclock_gettime(clock_t, struct timespec *);
+void nsec_to_timespec(long long int , struct timespec *);
 
 int get_cpu_usage(void);
 
diff --git a/lib/util.c b/lib/util.c
index a26cd51..a29e288 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -2202,6 +2202,25 @@ xsleep(unsigned int seconds)
 ovsrcu_quiesce_end();
 }
 
+/* High resolution sleep. */
+void
+xnanosleep(uint64_t nanoseconds)
+{
+ovsrcu_quiesce_start();
+#ifndef _WIN32
+int retval;
+struct timespec ts_sleep;
+nsec_to_timespec(nanoseconds, _sleep);
+
+int error = 0;
+do {
+retval = nanosleep(_sleep, NULL);
+error = retval < 0 ? errno : 0;
+} while (error == EINTR);
+#endif
+ovsrcu_quiesce_end();
+}
+
 /* Determine whether standard output is a tty or not. This is useful to decide
  * whether to use color output or not when --color option for utilities is set
  * to `auto`.
diff --git a/lib/util.h b/lib/util.h
index 3c43c2c..d355313 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -502,6 +502,7 @@ ovs_u128_and(const ovs_u128 a, const ovs_u128 b)
 }
 
 void xsleep(unsigned int seconds);
+void xnanosleep(uint64_t nanoseconds);
 
 bool is_stdout_a_tty(void);
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/7] timeval: Introduce macros to convert timespec and timeval.

2017-11-08 Thread Bhanuprakash Bodireddy
This commit replaces the numbers with MSEC_PER_SEC, NSEC_PER_SEC and 
USEC_PER_MSEC
macros when dealing with timespec and timeval.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/timeval.c | 29 -
 lib/timeval.h |  7 +++
 2 files changed, 23 insertions(+), 13 deletions(-)

diff --git a/lib/timeval.c b/lib/timeval.c
index b60bf30..8875fed 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -266,7 +266,7 @@ time_alarm(unsigned int secs)
 time_init();
 
 now = time_msec();
-msecs = secs * 1000LL;
+msecs = secs * MSEC_PER_SEC;
 deadline = now < LLONG_MAX - msecs ? now + msecs : LLONG_MAX;
 }
 
@@ -372,25 +372,28 @@ time_poll(struct pollfd *pollfds, int n_pollfds, HANDLE 
*handles OVS_UNUSED,
 long long int
 timespec_to_msec(const struct timespec *ts)
 {
-return (long long int) ts->tv_sec * 1000 + ts->tv_nsec / (1000 * 1000);
+return (long long int) ts->tv_sec * MSEC_PER_SEC +
+ts->tv_nsec / NSEC_PER_MSEC;
 }
 
 long long int
 timeval_to_msec(const struct timeval *tv)
 {
-return (long long int) tv->tv_sec * 1000 + tv->tv_usec / 1000;
+return (long long int) tv->tv_sec * MSEC_PER_SEC +
+tv->tv_usec / USEC_PER_MSEC;
 }
 
 long long int
 timespec_to_usec(const struct timespec *ts)
 {
-return (long long int) ts->tv_sec * 1000 * 1000 + ts->tv_nsec / 1000;
+return (long long int) ts->tv_sec * USEC_PER_SEC +
+ts->tv_nsec / NSEC_PER_USEC;
 }
 
 long long int
 timeval_to_usec(const struct timeval *tv)
 {
-return (long long int) tv->tv_sec * 1000 * 1000 + tv->tv_usec;
+return (long long int) tv->tv_sec * USEC_PER_SEC + tv->tv_usec;
 }
 
 /* Returns the monotonic time at which the "time" module was initialized, in
@@ -510,8 +513,8 @@ xclock_gettime(clock_t id, struct timespec *ts)
 static void
 msec_to_timespec(long long int ms, struct timespec *ts)
 {
-ts->tv_sec = ms / 1000;
-ts->tv_nsec = (ms % 1000) * 1000 * 1000;
+ts->tv_sec = ms / MSEC_PER_SEC;
+ts->tv_nsec = (ms % 1000) * NSEC_PER_MSEC;
 }
 
 static void
@@ -596,8 +599,8 @@ timespec_add(struct timespec *sum,
 
 tmp.tv_sec = a->tv_sec + b->tv_sec;
 tmp.tv_nsec = a->tv_nsec + b->tv_nsec;
-if (tmp.tv_nsec >= 1000 * 1000 * 1000) {
-tmp.tv_nsec -= 1000 * 1000 * 1000;
+if (tmp.tv_nsec >= NSEC_PER_SEC) {
+tmp.tv_nsec -= NSEC_PER_SEC;
 tmp.tv_sec++;
 }
 
@@ -621,7 +624,7 @@ log_poll_interval(long long int last_wakeup)
 {
 long long int interval = time_msec() - last_wakeup;
 
-if (interval >= 1000 && !is_warped(_clock)) {
+if (interval >= MSEC_PER_SEC && !is_warped(_clock)) {
 const struct rusage *last_rusage = get_recent_rusage();
 struct rusage rusage;
 
@@ -713,7 +716,7 @@ refresh_rusage(void)
 
 if (!getrusage_thread(recent_rusage)) {
 long long int now = time_msec();
-if (now >= t->newer.when + 3 * 1000) {
+if (now >= t->newer.when + 3 * MSEC_PER_SEC) {
 t->older = t->newer;
 t->newer.when = now;
 t->newer.cpu = (timeval_to_msec(_rusage->ru_utime) +
@@ -837,7 +840,7 @@ strftime_msec(char *s, size_t max, const char *format,
 struct tm_msec *
 localtime_msec(long long int now, struct tm_msec *result)
 {
-  time_t now_sec = now / 1000;
+  time_t now_sec = now / MSEC_PER_SEC;
   localtime_r(_sec, >tm);
   result->msec = now % 1000;
   return result;
@@ -846,7 +849,7 @@ localtime_msec(long long int now, struct tm_msec *result)
 struct tm_msec *
 gmtime_msec(long long int now, struct tm_msec *result)
 {
-  time_t now_sec = now / 1000;
+  time_t now_sec = now / MSEC_PER_SEC;
   gmtime_r(_sec, >tm);
   result->msec = now % 1000;
   return result;
diff --git a/lib/timeval.h b/lib/timeval.h
index c3dbb51..5e2a731 100644
--- a/lib/timeval.h
+++ b/lib/timeval.h
@@ -40,6 +40,13 @@ BUILD_ASSERT_DECL(TYPE_IS_SIGNED(time_t));
 #define TIME_MAX TYPE_MAXIMUM(time_t)
 #define TIME_MIN TYPE_MINIMUM(time_t)
 
+#define MSEC_PER_SEC1000L
+#define USEC_PER_SEC100L
+#define NSEC_PER_SEC10L
+#define USEC_PER_MSEC   1000L
+#define NSEC_PER_MSEC   100L
+#define NSEC_PER_USEC   1000L
+
 #ifdef _WIN32
 #define localtime_r(timep, result) localtime_s(result, timep)
 #define gmtime_r(timep, result) gmtime_s(result, timep)
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 0/7] Introduce high resolution sleep support.

2017-11-08 Thread Bhanuprakash Bodireddy
This patchset introduces high resolution sleep support for linux and windows.
Also time macros are introduced to replace the numbers with meaningful
names.

Bhanuprakash Bodireddy (7):
  timeval: Introduce macros to convert timespec and timeval.
  util: Add high resolution sleep support.
  util: High resolution sleep support for windows.
  util: Use MSEC_PER_SEC macro.
  ofproto: Use time macros.
  mac-learning: Use MSEC_PER_SEC Macro.
  pcap-file: Use time macros.

 lib/mac-learning.c   |  2 +-
 lib/mcast-snooping.c |  4 ++--
 lib/pcap-file.c  |  2 +-
 lib/timeval.c| 48 
 lib/timeval.h|  8 
 lib/util.c   | 42 +++---
 lib/util.h   |  1 +
 ofproto/ofproto-dpif-ipfix.c | 12 +--
 ofproto/ofproto-dpif-sflow.c |  2 +-
 9 files changed, 94 insertions(+), 27 deletions(-)

-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 7/7] ovsdb: Remove break after OVS_NOT_REACHED.

2017-10-01 Thread Bhanuprakash Bodireddy
The break statement would never be executed as OVS_NOT_REACHED()
internally invokes abort() and causes process termination.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 ovsdb/replication.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ovsdb/replication.c b/ovsdb/replication.c
index 304212d..8dd1181 100644
--- a/ovsdb/replication.c
+++ b/ovsdb/replication.c
@@ -874,7 +874,6 @@ replication_status(void)
 break;
 default:
 OVS_NOT_REACHED();
-break;
 }
 } else {
 ds_put_format(, "not connected to %s", sync_from);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 4/7] netdev-dpdk: Refactor netdev_dpdk structure.

2017-10-01 Thread Bhanuprakash Bodireddy
This commit introduces below changes to netdev_dpdk structure.

- Mark cachelines and reorder few member variables.
- Maintain the grouping of related member variables.
- Add comment on the information on pad bytes where ever appropriate, so
  new members can be introduced in the future to fill the gaps.

  Below is how this structure looks with this commit.

  Membersize

 OVS_CACHE_LINE_MARKER cacheline0;
 dpdk_port_t port_id;1
 bool attached;  1
 ...

 OVS_CACHE_LINE_MARKER cacheline1;
 struct ovs_mutex;  48
 struct dpdk_mp *dpdk_mp;8
 ...

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 158 +-
 1 file changed, 84 insertions(+), 74 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index c60f46f..d999807 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -350,80 +350,90 @@ enum dpdk_hw_ol_features {
 };
 
 struct netdev_dpdk {
-struct netdev up;
-dpdk_port_t port_id;
-int max_packet_len;
-enum dpdk_dev_type type;
-
-struct dpdk_tx_queue *tx_q;
-
-struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex);
-
-struct dpdk_mp *dpdk_mp;
-int mtu;
-int socket_id;
-int buf_size;
-struct netdev_stats stats;
-/* Protects stats */
-rte_spinlock_t stats_lock;
-
-struct eth_addr hwaddr;
-enum netdev_flags flags;
-
-struct rte_eth_link link;
-int link_reset_cnt;
-
-/* virtio identifier for vhost devices */
-ovsrcu_index vid;
-
-/* True if vHost device is 'up' and has been reconfigured at least once */
-bool vhost_reconfigured;
-
-/* Identifier used to distinguish vhost devices from each other. */
-char vhost_id[PATH_MAX];
-
-/* Device arguments for dpdk ports */
-char *devargs;
-
-/* If true, device was attached by rte_eth_dev_attach(). */
-bool attached;
-
-/* In dpdk_list. */
-struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
-
-/* QoS configuration and lock for the device */
-OVSRCU_TYPE(struct qos_conf *) qos_conf;
-
-/* The following properties cannot be changed when a device is running,
- * so we remember the request and update them next time
- * netdev_dpdk*_reconfigure() is called */
-int requested_mtu;
-int requested_n_txq;
-int requested_n_rxq;
-int requested_rxq_size;
-int requested_txq_size;
-
-/* Number of rx/tx descriptors for physical devices */
-int rxq_size;
-int txq_size;
-
-/* Socket ID detected when vHost device is brought up */
-int requested_socket_id;
-
-/* Denotes whether vHost port is client/server mode */
-uint64_t vhost_driver_flags;
-
-/* Ingress Policer */
-OVSRCU_TYPE(struct ingress_policer *) ingress_policer;
-uint32_t policer_rate;
-uint32_t policer_burst;
-
-/* DPDK-ETH Flow control */
-struct rte_eth_fc_conf fc_conf;
-
-/* DPDK-ETH hardware offload features,
- * from the enum set 'dpdk_hw_ol_features' */
-uint32_t hw_ol_features;
+PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
+dpdk_port_t port_id;
+
+/* If true, device was attached by rte_eth_dev_attach(). */
+bool attached;
+struct eth_addr hwaddr;
+int mtu;
+int socket_id;
+int buf_size;
+int max_packet_len;
+enum dpdk_dev_type type;
+enum netdev_flags flags;
+char *devargs;  /* Device arguments for dpdk ports */
+struct dpdk_tx_queue *tx_q;
+struct rte_eth_link link;
+int link_reset_cnt;
+/* 4 pad bytes here. */
+);
+
+PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
+struct ovs_mutex mutex OVS_ACQ_AFTER(dpdk_mutex);
+struct dpdk_mp *dpdk_mp;
+
+/* virtio identifier for vhost devices */
+ovsrcu_index vid;
+
+/* True if vHost device is 'up' and has been reconfigured at least 
once */
+bool vhost_reconfigured;
+/* 3 pad bytes here. */
+);
+
+PADDED_MEMBERS(CACHE_LINE_SIZE,
+/* Identifier used to distinguish vhost devices from each other. */
+char vhost_id[PATH_MAX];
+);
+
+PADDED_MEMBERS(CACHE_LINE_SIZE,
+struct netdev up;
+/* In dpdk_list. */
+struct ovs_list list_node OVS_GUARDED_BY(dpdk_mutex);
+
+/* QoS configuration and lock for the device */
+OVSRCU_TYPE(struct qos_conf *) qos_conf;
+
+/* Ingress Policer */
+OVSRCU_TYPE(struct ingress_policer *) ingress_policer;
+uint32_t policer_rate;
+uint32_t policer_burst;
+);
+
+PADDED_MEMBERS(CACHE_LINE_SIZE,
+struct netdev_stats stats;
+/* Protects stats */
+rte_spinlock_t stats_lock;
+/* 44 pad byte

[ovs-dev] [PATCH 5/7] ofproto: Remove break after OVS_NOT_REACHED.

2017-10-01 Thread Bhanuprakash Bodireddy
The break statement would never be executed as OVS_NOT_REACHED()
internally invokes abort() and causes process termination.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 ofproto/ofproto-dpif-ipfix.c | 1 -
 ofproto/ofproto-dpif-xlate.c | 2 --
 2 files changed, 3 deletions(-)

diff --git a/ofproto/ofproto-dpif-ipfix.c b/ofproto/ofproto-dpif-ipfix.c
index 472c272..538faff 100644
--- a/ofproto/ofproto-dpif-ipfix.c
+++ b/ofproto/ofproto-dpif-ipfix.c
@@ -1314,7 +1314,6 @@ ipfix_def_options_template_fields(enum 
ipfix_options_template opt_tmpl_type,
 case NUM_IPFIX_OPTIONS_TEMPLATE:
 default:
 OVS_NOT_REACHED();
-break;
 }
 
 return 0;
diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index d320d57..0a71855 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -3229,7 +3229,6 @@ propagate_tunnel_data_to_flow(struct xlate_ctx *ctx, 
struct eth_addr dmac,
 case __OVS_VPORT_TYPE_MAX:
 default:
 OVS_NOT_REACHED();
-break;
 }
 /*
  * Update base_flow first followed by flow as the dst_flow gets modified
@@ -5951,7 +5950,6 @@ xlate_generic_encap_action(struct xlate_ctx *ctx,
 default:
 /* New packet type was checked during decoding. */
 OVS_NOT_REACHED();
-break;
 }
 
 if (!ctx->error) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 6/7] ofp-actions: Remove break after OVS_NOT_REACHED.

2017-10-01 Thread Bhanuprakash Bodireddy
The break statement would never be executed as OVS_NOT_REACHED()
internally invokes abort() and causes process termination.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/ofp-actions.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lib/ofp-actions.c b/lib/ofp-actions.c
index 71eb70c..432b469 100644
--- a/lib/ofp-actions.c
+++ b/lib/ofp-actions.c
@@ -5482,7 +5482,6 @@ parse_UNROLL_XLATE(char *arg OVS_UNUSED,
enum ofputil_protocol *usable_protocols OVS_UNUSED)
 {
 OVS_NOT_REACHED();
-return NULL;
 }
 
 static void
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/7] dpif_netdev: Refactor dp_netdev_pmd_thread structure.

2017-10-01 Thread Bhanuprakash Bodireddy
This commit introduces below changes to dp_netdev_pmd_thread
structure.

- Mark cachelines and in this process reorder few members to avoid
  holes.
- Align emc_cache to a cacheline.
- Maintain the grouping of related member variables.
- Add comment on the information on pad bytes whereever appropriate so
  that new member variables may be introduced to fill the holes in future.

  Below is how the structure looks with this commit.

  Membersize

 OVS_CACHE_LINE_MARKER cacheline0;
 struct dp_netdev * dp;  8
 struct cmap_node node;  8
 pthread_cond_t cond;   48

 OVS_CACHE_LINE_MARKER cacheline1;
 struct ovs_mutex cond_mutex;   48
 pthread_t  thread;  8
 unsigned int core_id;   4
 intnuma_id; 4

 OVS_CACHE_LINE_MARKER cacheline2;
 struct emc_cache flow_cache;   4849672

 ###cachelineX: 64 bytes, 0 pad bytes
 struct cmap flow_table; 8
 

 ###cachelineY: 59 bytes, 5 pad bytes
   struct dp_netdev_pmd_stats stats 40
 

 ###cachelineZ: 48 bytes, 16 pad bytes###
 struct ovs_mutex port_mutex;   48
 

This change also improve the performance marginally.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 160 +++---
 1 file changed, 91 insertions(+), 69 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index d5eb830..4cd0edf 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -547,18 +547,31 @@ struct tx_port {
  * actions in either case.
  * */
 struct dp_netdev_pmd_thread {
-struct dp_netdev *dp;
-struct ovs_refcount ref_cnt;/* Every reference must be refcount'ed. */
-struct cmap_node node;  /* In 'dp->poll_threads'. */
-
-pthread_cond_t cond;/* For synchronizing pmd thread reload. */
-struct ovs_mutex cond_mutex;/* Mutex for condition variable. */
+PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
+struct dp_netdev *dp;
+struct cmap_node node;  /* In 'dp->poll_threads'. */
+pthread_cond_t cond;/* For synchronizing pmd thread
+   reload. */
+);
+
+PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
+struct ovs_mutex cond_mutex;/* Mutex for condition variable. */
+pthread_t thread;
+unsigned core_id;   /* CPU core id of this pmd thread. */
+int numa_id;/* numa node id of this pmd thread. */
+);
 
 /* Per thread exact-match cache.  Note, the instance for cpu core
  * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
  * need to be protected by 'non_pmd_mutex'.  Every other instance
  * will only be accessed by its own pmd thread. */
-struct emc_cache flow_cache;
+OVS_ALIGNED_VAR(CACHE_LINE_SIZE) struct emc_cache flow_cache;
+struct ovs_refcount ref_cnt;/* Every reference must be refcount'ed. */
+
+/* Queue id used by this pmd thread to send packets on all netdevs if
+ * XPS disabled for this netdev. All static_tx_qid's are unique and less
+ * than 'cmap_count(dp->poll_threads)'. */
+uint32_t static_tx_qid;
 
 /* Flow-Table and classifiers
  *
@@ -567,68 +580,77 @@ struct dp_netdev_pmd_thread {
  * 'flow_mutex'.
  */
 struct ovs_mutex flow_mutex;
-struct cmap flow_table OVS_GUARDED; /* Flow table. */
-
-/* One classifier per in_port polled by the pmd */
-struct cmap classifiers;
-/* Periodically sort subtable vectors according to hit frequencies */
-long long int next_optimization;
-/* End of the next time interval for which processing cycles
-   are stored for each polled rxq. */
-long long int rxq_interval;
-
-/* Statistics. */
-struct dp_netdev_pmd_stats stats;
-
-/* Cycles counters */
-struct dp_netdev_pmd_cycles cycles;
-
-/* Used to count cicles. See 'cycles_counter_end()' */
-unsigned long long last_cycles;
-
-struct latch exit_latch;/* For terminating the pmd thread. */
-struct seq *reload_seq;
-uint64_t last_reload_seq;
-atomic_bool reload; /* Do we need to reload ports? */
-pthread_t thread;
-unsigned core_id;   /* CPU core id of this pmd thread. */
-int numa_id;/* numa node id of this pmd thread. */
-bool isolated;
-
-/* Queue id used by this pmd thread to send packets on all netdevs if
- * XPS disabled for this netdev. All static_tx_qid's are unique and less
- * than 'cmap_count(dp->poll_threads)'. */
-uint32_t static_tx_qid;
-
-struct ovs_mutex port_mutex;/* Mutex for 'poll_list' and 'tx_ports'. */
-/* List of rx queues to poll. */
-str

[ovs-dev] [PATCH 2/7] cmap: Use PADDED_MEMBERS_CACHELINE_MARKER in cmap_impl.

2017-10-01 Thread Bhanuprakash Bodireddy
Instead of explicitly adding the pad bytes to force the structure an
exact multiple of cacheline size, let the macro do the job. This way
the pad bytes will be auto adjusted when the new members get introduced
in to the structure.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/cmap.c | 19 ---
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/lib/cmap.c b/lib/cmap.c
index 8c7312d..35decea 100644
--- a/lib/cmap.c
+++ b/lib/cmap.c
@@ -165,13 +165,18 @@ BUILD_ASSERT_DECL(sizeof(struct cmap_bucket) == 
CACHE_LINE_SIZE);
 
 /* The implementation of a concurrent hash map. */
 struct cmap_impl {
-unsigned int n; /* Number of in-use elements. */
-unsigned int max_n; /* Max elements before enlarging. */
-unsigned int min_n; /* Min elements before shrinking. */
-uint32_t mask;  /* Number of 'buckets', minus one. */
-uint32_t basis; /* Basis for rehashing client's hash values. */
-uint8_t pad[CACHE_LINE_SIZE - 4 * 5]; /* Pad to end of cache line. */
-struct cmap_bucket buckets[1];
+PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline0,
+unsigned int n; /* Number of in-use elements. */
+unsigned int max_n; /* Max elements before enlarging. */
+unsigned int min_n; /* Min elements before shrinking. */
+uint32_t mask;  /* Number of 'buckets', minus one. */
+uint32_t basis; /* Basis for rehashing client's
+   hash values. */
+);
+
+PADDED_MEMBERS_CACHELINE_MARKER(CACHE_LINE_SIZE, cacheline1,
+struct cmap_bucket buckets[1];
+);
 };
 BUILD_ASSERT_DECL(sizeof(struct cmap_impl) == CACHE_LINE_SIZE * 2);
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/7] ccmap: Use PADDED_MEMBERS macro in ccmap_impl structure.

2017-10-01 Thread Bhanuprakash Bodireddy
Instead of explicitly adding the pad bytes to force the structure an exact
multiple of cacheline size, let the PADDED_MEMBERS macro do the job.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/ccmap.c | 19 +--
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/lib/ccmap.c b/lib/ccmap.c
index 08359b5..a460833 100644
--- a/lib/ccmap.c
+++ b/lib/ccmap.c
@@ -102,16 +102,15 @@ BUILD_ASSERT_DECL(sizeof(struct ccmap_bucket) == 
CACHE_LINE_SIZE);
 
 /* The implementation of a concurrent hash map. */
 struct ccmap_impl {
-unsigned int n_unique;  /* Number of in-use nodes. */
-unsigned int n; /* Number of hashes inserted. */
-unsigned int max_n; /* Max nodes before enlarging. */
-unsigned int min_n; /* Min nodes before shrinking. */
-uint32_t mask;  /* Number of 'buckets', minus one. */
-uint32_t basis; /* Basis for rehashing client's hash values. */
-
-/* Padding to make ccmap_impl exactly one cache line long. */
-uint8_t pad[CACHE_LINE_SIZE - sizeof(unsigned int) * 6];
-
+PADDED_MEMBERS(CACHE_LINE_SIZE,
+unsigned int n_unique;  /* Number of in-use nodes. */
+unsigned int n; /* Number of hashes inserted. */
+unsigned int max_n; /* Max nodes before enlarging. */
+unsigned int min_n; /* Min nodes before shrinking. */
+uint32_t mask;  /* Number of 'buckets', minus one. */
+uint32_t basis; /* Basis for rehashing client's
+   hash values. */
+);
 struct ccmap_bucket buckets[];
 };
 BUILD_ASSERT_DECL(sizeof(struct ccmap_impl) == CACHE_LINE_SIZE);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 11/12] dpif-netdev: Remove 'cnt' in dp_netdev_input__().

2017-09-20 Thread Bhanuprakash Bodireddy
There is little use of 'cnt' variable in dp_netdev_input__(). Get rid of
it and use dp_packet_batch_size() to initialize PKT_ARRAY_SIZE.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 07fca44..35b7a64 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5100,9 +5100,8 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
   struct dp_packet_batch *packets,
   bool md_is_valid, odp_port_t port_no)
 {
-int cnt = packets->count;
 #if !defined(__CHECKER__) && !defined(_WIN32)
-const size_t PKT_ARRAY_SIZE = cnt;
+const size_t PKT_ARRAY_SIZE = dp_packet_batch_size(packets);
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 10/12] odp-execute: Use const qualifer for batch size.

2017-09-20 Thread Bhanuprakash Bodireddy
It is recommended to use const qualifer for 'num' that tracks the
packet batch count. This way 'num' can't be modified by iterator.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/odp-execute.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 5f4d23a..3109f39 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -826,7 +826,8 @@ odp_execute_actions(void *dp, struct dp_packet_batch 
*batch, bool steal,
 break;
 }
 case OVS_ACTION_ATTR_DECAP_NSH: {
-size_t i, num = batch->count;
+size_t i;
+const size_t num = dp_packet_batch_size(batch);
 
 DP_PACKET_BATCH_REFILL_FOR_EACH (i, num, packet, batch) {
 if (decap_nsh(packet)) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 12/12] dpif-netdev: Fix comments for pmd_load_cached_ports.

2017-09-20 Thread Bhanuprakash Bodireddy
Commit 57eebbb4c315 replaces thread local 'pmd->port_cache' with
'pmd->tnl_port_cache' and 'pmd->send_port_cache' maps. Update the
comments accordingly.

Fixes: 57eebbb4c315 ("Don't try to output on a device without txqs")
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 35b7a64..1b5420e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3933,7 +3933,9 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
 }
 
 /* Copies ports from 'pmd->tx_ports' (shared with the main thread) to
- * 'pmd->port_cache' (thread local) */
+ * thread-local copies. Copy to 'pmd->tnl_port_cache' if it is a tunnel
+ * device, otherwise to 'pmd->send_port_cache' if the port has atleast
+ * one txq. */
 static void
 pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
 OVS_REQUIRES(pmd->port_mutex)
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 09/12] netdev-dpdk: Minor cleanup of netdev_dpdk_send__.

2017-09-20 Thread Bhanuprakash Bodireddy
The variable 'cnt' is initialized and reused in multiple function calls
inside netdev_dpdk_send__() and is confusing sometimes. Instead introduce
'batch_cnt' to hold the original packet count and 'tx_cnt' to store
the final packet count resulting after filtering and qos operations.

Finally 'tx_cnt' packets gets transmitted on the respective 'qid'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 60887e5..3ff79c1 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1938,17 +1938,17 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 dpdk_do_tx_copy(netdev, qid, batch);
 dp_packet_delete_batch(batch, may_steal);
 } else {
-int dropped;
-int cnt = batch->count;
+int tx_cnt, dropped;
+int batch_cnt = dp_packet_batch_size(batch);
 struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
 
 dp_packet_batch_apply_cutlen(batch);
 
-cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
-cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
-dropped = batch->count - cnt;
+tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
+tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
+dropped = batch_cnt - tx_cnt;
 
-dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
+dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
 
 if (OVS_UNLIKELY(dropped)) {
 rte_spinlock_lock(>stats_lock);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 05/12] dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in dp_netdev_run_meter.

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in dp_netdev_run_meter().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ca74df8..5000f7a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4121,10 +4121,11 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 {
 struct dp_meter *meter;
 struct dp_meter_band *band;
+struct dp_packet *packet;
 long long int long_delta_t; /* msec */
 uint32_t delta_t; /* msec */
 int i;
-int cnt = packets_->count;
+const size_t cnt = dp_packet_batch_size(packets_);
 uint32_t bytes, volume;
 int exceeded_band[NETDEV_MAX_BURST];
 uint32_t exceeded_rate[NETDEV_MAX_BURST];
@@ -4157,8 +4158,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 meter->used = now;
 meter->packet_count += cnt;
 bytes = 0;
-for (i = 0; i < cnt; i++) {
-bytes += dp_packet_size(packets_->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+bytes += dp_packet_size(packet);
 }
 meter->byte_count += bytes;
 
@@ -4208,8 +4209,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 } else {
 /* Packet sizes differ, must process one-by-one. */
 band_exceeded_pkt = cnt;
-for (i = 0; i < cnt; i++) {
-uint32_t bits = dp_packet_size(packets_->packets[i]) * 8;
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+uint32_t bits = dp_packet_size(packet) * 8;
 
 if (band->bucket >= bits) {
 band->bucket -= bits;
@@ -4237,10 +4238,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 /* Fire the highest rate band exceeded by each packet.
  * Drop packets if needed, by swapping packet to the end that will be
  * ignored. */
-const size_t size = dp_packet_batch_size(packets_);
-struct dp_packet *packet;
 size_t j;
-DP_PACKET_BATCH_REFILL_FOR_EACH (j, size, packet, packets_) {
+DP_PACKET_BATCH_REFILL_FOR_EACH (j, cnt, packet, packets_) {
 if (exceeded_band[j] >= 0) {
 /* Meter drop packet. */
 band = >bands[exceeded_band[j]];
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 08/12] netdev-dpdk: Cleanup dpdk_do_tx_copy.

2017-09-20 Thread Bhanuprakash Bodireddy
Cleanup dpdk_do_tx_copy().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 60ec528..60887e5 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1835,22 +1835,23 @@ static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 OVS_NO_THREAD_SAFETY_ANALYSIS
 {
+const size_t batch_cnt = dp_packet_batch_size(batch);
 #if !defined(__CHECKER__) && !defined(_WIN32)
-const size_t PKT_ARRAY_SIZE = batch->count;
+const size_t PKT_ARRAY_SIZE = batch_cnt;
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
 #endif
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
-uint32_t cnt = batch->count;
+uint32_t cnt = batch_cnt;
 uint32_t dropped = 0;
 
 if (dev->type != DPDK_DEV_VHOST) {
 /* Check if QoS has been configured for this netdev. */
 cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
-  cnt, false);
-dropped += batch->count - cnt;
+  batch_cnt, false);
+dropped += batch_cnt - cnt;
 }
 
 dp_packet_batch_apply_cutlen(batch);
@@ -1858,8 +1859,8 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 uint32_t txcnt = 0;
 
 for (uint32_t i = 0; i < cnt; i++) {
-
-uint32_t size = dp_packet_size(batch->packets[i]);
+struct dp_packet *packet = batch->packets[i];
+uint32_t size = dp_packet_size(packet);
 
 if (OVS_UNLIKELY(size > dev->max_packet_len)) {
 VLOG_WARN_RL(, "Too big size %u max_packet_len %d",
@@ -1870,18 +1871,15 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 }
 
 pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
-
-if (!pkts[txcnt]) {
+if (OVS_UNLIKELY(!pkts[txcnt])) {
 dropped += cnt - i;
 break;
 }
 
 /* We have to do a copy for now */
 memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
-   dp_packet_data(batch->packets[i]), size);
-
-rte_pktmbuf_data_len(pkts[txcnt]) = size;
-rte_pktmbuf_pkt_len(pkts[txcnt]) = size;
+   dp_packet_data(packet), size);
+dp_packet_set_size((struct dp_packet *)pkts[txcnt], size);
 
 txcnt++;
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 04/12] netdev-bsd: Use DP_PACKET_BATCH_FOR_EACH in netdev_bsd_send.

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_bsd_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-bsd.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 8a4cdb3..96ba71c 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -685,6 +685,7 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
 {
 struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
 const char *name = netdev_get_name(netdev_);
+struct dp_packet *packet;
 int error;
 int i;
 
@@ -695,9 +696,9 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
 error = 0;
 }
 
-for (i = 0; i < batch->count; i++) {
-const void *data = dp_packet_data(batch->packets[i]);
-size_t size = dp_packet_get_send_len(batch->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+const void *data = dp_packet_data(packet);
+size_t size = dp_packet_get_send_len(packet);
 
 while (!error) {
 ssize_t retval;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 03/12] netdev-dpdk: Use DP_PACKET_BATCH_FOR_EACH in netdev_dpdk_ring_send

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_dpdk_ring_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 648d719..60ec528 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2942,14 +2942,14 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid,
   bool concurrent_txq)
 {
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-unsigned i;
+struct dp_packet *packet;
 
 /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that
  * the rss hash field is clear. This is because the same mbuf may be
  * modified by the consumer of the ring and return into the datapath
  * without recalculating the RSS hash. */
-for (i = 0; i < batch->count; i++) {
-dp_packet_mbuf_rss_flag_reset(batch->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+dp_packet_mbuf_rss_flag_reset(packet);
 }
 
 netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 02/12] netdev-linux: Use DP_PACKET_BATCH_FOR_EACH in netdev_linux_tap_batch_send.

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_linux_tap_batch_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-linux.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 2ff3e2b..6769819 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1232,8 +1232,8 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
 struct dp_packet_batch *batch)
 {
 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-for (int i = 0; i < batch->count; i++) {
-struct dp_packet *packet = batch->packets[i];
+struct dp_packet *packet;
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
 size_t size = dp_packet_get_send_len(packet);
 ssize_t retval;
 int error;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 01/12] conntrack: Use DP_PACKET_BATCH_FOR_EACH macro.

2017-09-20 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in conntrack_execute().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/conntrack.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 419cb1d..019092e 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1141,17 +1141,16 @@ conntrack_execute(struct conntrack *ct, struct 
dp_packet_batch *pkt_batch,
   long long now)
 {
 
-struct dp_packet **pkts = pkt_batch->packets;
-size_t cnt = pkt_batch->count;
+struct dp_packet *packet;
 struct conn_lookup_ctx ctx;
 
-for (size_t i = 0; i < cnt; i++) {
-if (!conn_key_extract(ct, pkts[i], dl_type, , zone)) {
-pkts[i]->md.ct_state = CS_INVALID;
-write_ct_md(pkts[i], zone, NULL, NULL, NULL);
+DP_PACKET_BATCH_FOR_EACH (packet, pkt_batch) {
+if (!conn_key_extract(ct, packet, dl_type, , zone)) {
+packet->md.ct_state = CS_INVALID;
+write_ct_md(packet, zone, NULL, NULL, NULL);
 continue;
 }
-process_one(ct, pkts[i], , zone, force, commit,
+process_one(ct, packet, , zone, force, commit,
 now, setmark, setlabel, nat_action_info, helper);
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 00/12] Use packet batch macro and APIs.

2017-09-20 Thread Bhanuprakash Bodireddy
DP_PACKET_BATCH_FOR_EACH macro was introduced early this year as part
of enhancing packet batch APIs. Commit '72c84bc2' implemented this macro
and replaced most of the calling sites with macros and simplified the logic.
However there are still many APIs that needs to be fixed.

This patch series is a simple and straightforward set of changes
aimed at using DP_PACKET_BATCH_FOR_EACH macro at all appropriate places.

The second part of the patch series deals with code cleanup and renaming
few variables to improve the readability of the code. The last patch in
this series fixes a wrong comment.

No functionality changes and performance impact with this series.

v1->v2
 * Add patches from below to this series.
   https://mail.openvswitch.org/pipermail/ovs-dev/2017-September/338673.html

 * Group the patches by theme in the below order.
6 patches - Use DP_PACKET_BATCH_FOR_EACH
5 patches - Code Cleanup
1 patch   - Fix comment.

Bhanuprakash Bodireddy (12):
  conntrack: Use DP_PACKET_BATCH_FOR_EACH macro.
  netdev-linux: Use DP_PACKET_BATCH_FOR_EACH in
netdev_linux_tap_batch_send.
  netdev-dpdk: Use DP_PACKET_BATCH_FOR_EACH in netdev_dpdk_ring_send
  netdev-bsd: Use DP_PACKET_BATCH_FOR_EACH in netdev_bsd_send.
  dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in dp_netdev_run_meter.
  dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in fast_path_processing.
  netdev-linux: Clean up netdev_linux_sock_batch_send().
  netdev-dpdk: Cleanup dpdk_do_tx_copy.
  netdev-dpdk: Minor cleanup of netdev_dpdk_send__.
  odp-execute: Use const qualifer for batch size.
  dpif-netdev: Remove 'cnt' in dp_netdev_input__().
  dpif-netdev: Fix comments for pmd_load_cached_ports.

 lib/conntrack.c| 13 ++---
 lib/dpif-netdev.c  | 37 ++---
 lib/netdev-bsd.c   |  7 ---
 lib/netdev-dpdk.c  | 40 +++-
 lib/netdev-linux.c | 17 +
 lib/odp-execute.c  |  3 ++-
 6 files changed, 58 insertions(+), 59 deletions(-)

-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 10/10] dpif-netdev: Remove 'cnt' in dp_netdev_input__().

2017-09-19 Thread Bhanuprakash Bodireddy
There is little use of 'cnt' variable in dp_netdev_input__(). Get rid of
it and use dp_packet_batch_size() to initialize PKT_ARRAY_SIZE.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 07fca44..35b7a64 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5100,9 +5100,8 @@ dp_netdev_input__(struct dp_netdev_pmd_thread *pmd,
   struct dp_packet_batch *packets,
   bool md_is_valid, odp_port_t port_no)
 {
-int cnt = packets->count;
 #if !defined(__CHECKER__) && !defined(_WIN32)
-const size_t PKT_ARRAY_SIZE = cnt;
+const size_t PKT_ARRAY_SIZE = dp_packet_batch_size(packets);
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 09/10] dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in fast_path_processing.

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in fast_path_processing().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5000f7a..07fca44 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -5001,14 +5001,14 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
  odp_port_t in_port,
  long long now)
 {
-int cnt = packets_->count;
+const size_t cnt = dp_packet_batch_size(packets_);
 #if !defined(__CHECKER__) && !defined(_WIN32)
 const size_t PKT_ARRAY_SIZE = cnt;
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
 #endif
-struct dp_packet **packets = packets_->packets;
+struct dp_packet *packet;
 struct dpcls *cls;
 struct dpcls_rule *rules[PKT_ARRAY_SIZE];
 struct dp_netdev *dp = pmd->dp;
@@ -5036,7 +5036,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 ofpbuf_use_stub(, actions_stub, sizeof actions_stub);
 ofpbuf_use_stub(_actions, slow_stub, sizeof slow_stub);
 
-for (i = 0; i < cnt; i++) {
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
 struct dp_netdev_flow *netdev_flow;
 
 if (OVS_LIKELY(rules[i])) {
@@ -5055,7 +5055,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 }
 
 miss_cnt++;
-handle_packet_upcall(pmd, packets[i], [i], ,
+handle_packet_upcall(pmd, packet, [i], ,
  _actions, _cnt, now);
 }
 
@@ -5063,17 +5063,16 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 ofpbuf_uninit(_actions);
 fat_rwlock_unlock(>upcall_rwlock);
 } else if (OVS_UNLIKELY(any_miss)) {
-for (i = 0; i < cnt; i++) {
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
 if (OVS_UNLIKELY(!rules[i])) {
-dp_packet_delete(packets[i]);
+dp_packet_delete(packet);
 lost_cnt++;
 miss_cnt++;
 }
 }
 }
 
-for (i = 0; i < cnt; i++) {
-struct dp_packet *packet = packets[i];
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
 struct dp_netdev_flow *flow;
 
 if (OVS_UNLIKELY(!rules[i])) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 08/10] dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in dp_netdev_run_meter.

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in dp_netdev_run_meter().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 15 +++
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ca74df8..5000f7a 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -4121,10 +4121,11 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 {
 struct dp_meter *meter;
 struct dp_meter_band *band;
+struct dp_packet *packet;
 long long int long_delta_t; /* msec */
 uint32_t delta_t; /* msec */
 int i;
-int cnt = packets_->count;
+const size_t cnt = dp_packet_batch_size(packets_);
 uint32_t bytes, volume;
 int exceeded_band[NETDEV_MAX_BURST];
 uint32_t exceeded_rate[NETDEV_MAX_BURST];
@@ -4157,8 +4158,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 meter->used = now;
 meter->packet_count += cnt;
 bytes = 0;
-for (i = 0; i < cnt; i++) {
-bytes += dp_packet_size(packets_->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+bytes += dp_packet_size(packet);
 }
 meter->byte_count += bytes;
 
@@ -4208,8 +4209,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 } else {
 /* Packet sizes differ, must process one-by-one. */
 band_exceeded_pkt = cnt;
-for (i = 0; i < cnt; i++) {
-uint32_t bits = dp_packet_size(packets_->packets[i]) * 8;
+DP_PACKET_BATCH_FOR_EACH (packet, packets_) {
+uint32_t bits = dp_packet_size(packet) * 8;
 
 if (band->bucket >= bits) {
 band->bucket -= bits;
@@ -4237,10 +4238,8 @@ dp_netdev_run_meter(struct dp_netdev *dp, struct 
dp_packet_batch *packets_,
 /* Fire the highest rate band exceeded by each packet.
  * Drop packets if needed, by swapping packet to the end that will be
  * ignored. */
-const size_t size = dp_packet_batch_size(packets_);
-struct dp_packet *packet;
 size_t j;
-DP_PACKET_BATCH_REFILL_FOR_EACH (j, size, packet, packets_) {
+DP_PACKET_BATCH_REFILL_FOR_EACH (j, cnt, packet, packets_) {
 if (exceeded_band[j] >= 0) {
 /* Meter drop packet. */
 band = >bands[exceeded_band[j]];
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 07/10] odp-execute: Use const qualifer for batch size.

2017-09-19 Thread Bhanuprakash Bodireddy
It is recommended to use const qualifer for 'num' that tracks the
packet batch count. This way 'num' can't be modified by iterator.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/odp-execute.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 5f4d23a..3109f39 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -826,7 +826,8 @@ odp_execute_actions(void *dp, struct dp_packet_batch 
*batch, bool steal,
 break;
 }
 case OVS_ACTION_ATTR_DECAP_NSH: {
-size_t i, num = batch->count;
+size_t i;
+const size_t num = dp_packet_batch_size(batch);
 
 DP_PACKET_BATCH_REFILL_FOR_EACH (i, num, packet, batch) {
 if (decap_nsh(packet)) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 06/10] netdev-bsd: Use DP_PACKET_BATCH_FOR_EACH in netdev_bsd_send.

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_bsd_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-bsd.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 8a4cdb3..96ba71c 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -685,6 +685,7 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
 {
 struct netdev_bsd *dev = netdev_bsd_cast(netdev_);
 const char *name = netdev_get_name(netdev_);
+struct dp_packet *packet;
 int error;
 int i;
 
@@ -695,9 +696,9 @@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
 error = 0;
 }
 
-for (i = 0; i < batch->count; i++) {
-const void *data = dp_packet_data(batch->packets[i]);
-size_t size = dp_packet_get_send_len(batch->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+const void *data = dp_packet_data(packet);
+size_t size = dp_packet_get_send_len(packet);
 
 while (!error) {
 ssize_t retval;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 05/10] netdev-dpdk: Use DP_PACKET_BATCH_FOR_EACH in netdev_dpdk_ring_send

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_dpdk_ring_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4e75bf1..3ff79c1 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2940,14 +2940,14 @@ netdev_dpdk_ring_send(struct netdev *netdev, int qid,
   bool concurrent_txq)
 {
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-unsigned i;
+struct dp_packet *packet;
 
 /* When using 'dpdkr' and sending to a DPDK ring, we want to ensure that
  * the rss hash field is clear. This is because the same mbuf may be
  * modified by the consumer of the ring and return into the datapath
  * without recalculating the RSS hash. */
-for (i = 0; i < batch->count; i++) {
-dp_packet_mbuf_rss_flag_reset(batch->packets[i]);
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+dp_packet_mbuf_rss_flag_reset(packet);
 }
 
 netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 04/10] netdev-dpdk: Minor cleanup of netdev_dpdk_send__.

2017-09-19 Thread Bhanuprakash Bodireddy
The variable 'cnt' is initialized and reused in multiple function calls
inside netdev_dpdk_send__() and is confusing sometimes. Instead introduce
'batch_cnt' to hold the original packet count and 'tx_cnt' to store
the final packet count resulting after filtering and qos operations.

Finally 'tx_cnt' packets gets transmitted on the respective 'qid'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 2078c94..4e75bf1 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1938,17 +1938,17 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
 dpdk_do_tx_copy(netdev, qid, batch);
 dp_packet_delete_batch(batch, may_steal);
 } else {
-int dropped;
-int cnt = batch->count;
+int tx_cnt, dropped;
+int batch_cnt = dp_packet_batch_size(batch);
 struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
 
 dp_packet_batch_apply_cutlen(batch);
 
-cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
-cnt = netdev_dpdk_qos_run(dev, pkts, cnt, true);
-dropped = batch->count - cnt;
+tx_cnt = netdev_dpdk_filter_packet_len(dev, pkts, batch_cnt);
+tx_cnt = netdev_dpdk_qos_run(dev, pkts, tx_cnt, true);
+dropped = batch_cnt - tx_cnt;
 
-dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, cnt);
+dropped += netdev_dpdk_eth_tx_burst(dev, qid, pkts, tx_cnt);
 
 if (OVS_UNLIKELY(dropped)) {
 rte_spinlock_lock(>stats_lock);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 03/10] netdev-dpdk: Cleanup dpdk_do_tx_copy.

2017-09-19 Thread Bhanuprakash Bodireddy
Clean up dpdk_do_tx_copy()

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dpdk.c | 22 ++
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 648d719..2078c94 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1835,22 +1835,23 @@ static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
 OVS_NO_THREAD_SAFETY_ANALYSIS
 {
+const size_t batch_cnt = dp_packet_batch_size(batch);
 #if !defined(__CHECKER__) && !defined(_WIN32)
-const size_t PKT_ARRAY_SIZE = batch->count;
+const size_t PKT_ARRAY_SIZE = batch_cnt;
 #else
 /* Sparse or MSVC doesn't like variable length array. */
 enum { PKT_ARRAY_SIZE = NETDEV_MAX_BURST };
 #endif
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
 struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
-uint32_t cnt = batch->count;
+uint32_t cnt = batch_cnt;
 uint32_t dropped = 0;
 
 if (dev->type != DPDK_DEV_VHOST) {
 /* Check if QoS has been configured for this netdev. */
 cnt = netdev_dpdk_qos_run(dev, (struct rte_mbuf **) batch->packets,
-  cnt, false);
-dropped += batch->count - cnt;
+  batch_cnt, false);
+dropped += batch_cnt - cnt;
 }
 
 dp_packet_batch_apply_cutlen(batch);
@@ -1858,8 +1859,8 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 uint32_t txcnt = 0;
 
 for (uint32_t i = 0; i < cnt; i++) {
-
-uint32_t size = dp_packet_size(batch->packets[i]);
+struct dp_packet *packet = batch->packets[i];
+uint32_t size = dp_packet_size(packet);
 
 if (OVS_UNLIKELY(size > dev->max_packet_len)) {
 VLOG_WARN_RL(, "Too big size %u max_packet_len %d",
@@ -1870,18 +1871,15 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 }
 
 pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
-
-if (!pkts[txcnt]) {
+if (OVS_UNLIKELY(!pkts[txcnt])) {
 dropped += cnt - i;
 break;
 }
 
 /* We have to do a copy for now */
 memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
-   dp_packet_data(batch->packets[i]), size);
-
-rte_pktmbuf_data_len(pkts[txcnt]) = size;
-rte_pktmbuf_pkt_len(pkts[txcnt]) = size;
+   dp_packet_data(packet), size);
+dp_packet_set_size((struct dp_packet *)pkts[txcnt], size);
 
 txcnt++;
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 02/10] netdev-linux: Use DP_PACKET_BATCH_FOR_EACH in netdev_linux_tap_batch_send.

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in netdev_linux_tap_batch_send().

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-linux.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index a1d9e2f..440598b 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1233,8 +1233,8 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
 struct dp_packet_batch *batch)
 {
 struct netdev_linux *netdev = netdev_linux_cast(netdev_);
-for (int i = 0; i < batch->count; i++) {
-struct dp_packet *packet = batch->packets[i];
+struct dp_packet *packet;
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
 size_t size = dp_packet_get_send_len(packet);
 ssize_t retval;
 int error;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 01/10] netdev-linux: Clean up netdev_linux_sock_batch_send().

2017-09-19 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro and dp_packet_batch_size() API
in netdev_linux_sock_batch_send(). No change in functionality.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-linux.c | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 2ff3e2b..a1d9e2f 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -1186,16 +1186,17 @@ static int
 netdev_linux_sock_batch_send(int sock, int ifindex,
  struct dp_packet_batch *batch)
 {
+const size_t size = dp_packet_batch_size(batch);
 /* We don't bother setting most fields in sockaddr_ll because the
  * kernel ignores them for SOCK_RAW. */
 struct sockaddr_ll sll = { .sll_family = AF_PACKET,
.sll_ifindex = ifindex };
 
-struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * batch->count);
-struct iovec *iov = xmalloc(sizeof(*iov) * batch->count);
+struct mmsghdr *mmsg = xmalloc(sizeof(*mmsg) * size);
+struct iovec *iov = xmalloc(sizeof(*iov) * size);
 
-for (int i = 0; i < batch->count; i++) {
-struct dp_packet *packet = batch->packets[i];
+struct dp_packet *packet;
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
 iov[i].iov_base = dp_packet_data(packet);
 iov[i].iov_len = dp_packet_get_send_len(packet);
 mmsg[i].msg_hdr = (struct msghdr) { .msg_name = ,
@@ -1205,10 +1206,10 @@ netdev_linux_sock_batch_send(int sock, int ifindex,
 }
 
 int error = 0;
-for (uint32_t ofs = 0; ofs < batch->count; ) {
+for (uint32_t ofs = 0; ofs < size; ) {
 ssize_t retval;
 do {
-retval = sendmmsg(sock, mmsg + ofs, batch->count - ofs, 0);
+retval = sendmmsg(sock, mmsg + ofs, size - ofs, 0);
 error = retval < 0 ? errno : 0;
 } while (error == EINTR);
 if (error) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 00/10] Use DP_PACKET_BATCH_FOR_EACH macro.

2017-09-19 Thread Bhanuprakash Bodireddy
DP_PACKET_BATCH_FOR_EACH macro was introduced early this year as part
of enhancing packet batch APIs. Commit '72c84bc2' implemented this macro
and replaced most of the calling sites with macros and simplified the logic.

However there are still many APIs that needs to be fixed.
This patch series is a simple and straightforward set of changes
aimed at using DP_PACKET_BATCH_FOR_EACH macro at all appropriate places.
Also minor code cleanup is done to improve readability of the code.

No functionality changes and no performance impact with this series.

Bhanuprakash Bodireddy (10):
  netdev-linux: Clean up netdev_linux_sock_batch_send().
  netdev-linux: Use DP_PACKET_BATCH_FOR_EACH in
netdev_linux_tap_batch_send.
  netdev-dpdk: Cleanup dpdk_do_tx_copy.
  netdev-dpdk: Minor cleanup of netdev_dpdk_send__.
  netdev-dpdk: Use DP_PACKET_BATCH_FOR_EACH in netdev_dpdk_ring_send
  netdev-bsd: Use DP_PACKET_BATCH_FOR_EACH in netdev_bsd_send.
  odp-execute: Use const qualifer for batch size.
  dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in dp_netdev_run_meter.
  dpif-netdev: Use DP_PACKET_BATCH_FOR_EACH in fast_path_processing.
  dpif-netdev: Remove 'cnt' in dp_netdev_input__().

 lib/dpif-netdev.c  | 33 +++--
 lib/netdev-bsd.c   |  7 ---
 lib/netdev-dpdk.c  | 40 +++-
 lib/netdev-linux.c | 17 +
 lib/odp-execute.c  |  3 ++-
 5 files changed, 49 insertions(+), 51 deletions(-)

-- 
2.4.11
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 10/10] NEWS: Add keepalive support information in NEWS.

2017-09-15 Thread Bhanuprakash Bodireddy
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 NEWS | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/NEWS b/NEWS
index 6a5d2bf..7f5ff78 100644
--- a/NEWS
+++ b/NEWS
@@ -3,6 +3,8 @@ Post-v2.8.0
- OVN:
  * The "requested-chassis" option for a logical switch port now accepts a
chassis "hostname" in addition to a chassis "name".
+   - Userspace Datapath:
+ * Added Keepalive support for userspace datapath.
 
 v2.8.0 - xx xxx 
 -
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 09/10] Documentation: Update DPDK doc with Keepalive feature.

2017-09-15 Thread Bhanuprakash Bodireddy
Keepalive feature is aimed at achieving Fastpath Service Assurance
in OVS-DPDK deployments. It adds support for monitoring the packet
processing threads by dispatching heartbeats at regular intervals.

The implementation uses OvSDB for reporting the health of the PMD threads.
Any external monitoring application can query the OvSDB for status
at regular intervals (or) subscribe to OvSDB updates.

keepalive feature can be enabled through below OVSDB settings.

enable-keepalive=true
  - Keepalive feature is disabled by default and should be enabled
at startup before ovs-vswitchd daemon is started.

keepalive-interval="5000"
  - Timer interval in milliseconds for monitoring the packet
processing cores.

When KA is enabled, 'ovs-keepalive' thread shall be spawned that wakes
up at regular intervals to update the timestamp and status of pmd
threads in process map. This information shall be read by vswitchd thread
and written in to 'keepalive' column of Open_vSwitch table in OVSDB.

An external monitoring framework like collectd with ovs events support
can read (or) subscribe to the datapath status changes in ovsdb. When the state
is updated, the collectd shall be notified and will eventually relay the status
to ceilometer service running in the controller. Below is the high level
overview of deployment model.

Compute NodeControllerCompute Node

Collectd  <--> Ceilometer <>   Collectd

OvS DPDK   OvS DPDK

+-+
| VM  |
+--+--+
   \---+---/
   |
+--+---+   ++--+ +--+---+
| OVS  |-> |   ovsevents plugin| --> |   collectd   |
+--+---+   ++--+ +--+---+

+--+-+ +---++ |
| Ceilometer | <-- | collectd ceilometer plugin |  <---
+--+-+ +---++

Performance impact
--
No noticeable performance or latency impact is observed with
KA feature enabled.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 Documentation/howto/dpdk.rst | 113 +++
 1 file changed, 113 insertions(+)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index d123819..6fc1316 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -439,6 +439,119 @@ For certain traffic profiles with many parallel flows, 
it's recommended to set
 
 For more information on the EMC refer to :doc:`/intro/install/dpdk` .
 
+.. _dpdk_keepalive:
+
+Keepalive
+-
+
+OvS Keepalive(KA) feature is disabled by default. To enable KA feature::
+
+$ ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true
+
+The KA feature can't be enabled at run time and should be done at startup
+before ovs-vswitchd daemon is started.
+
+The default timer interval for monitoring packet processing threads is 1000ms.
+To set a different timer value, run::
+
+$ ovs-vsctl --no-wait set Open_vSwitch . \
+other_config:keepalive-interval="5000"
+
+The events comprise of thread states and the last seen timestamps. The events
+are written in to process map periodically by keepalive thread.
+
+The events in the process map are retrieved by main(vswitchd) thread and
+updated in to keepalive column of Open_vSwitch table in OVSDB. Any external
+monitoring application can read the status from OVSDB at intervals or subscribe
+to the updates so that they get notified when the changes happen on OvSDB.
+
+To monitor the datapath status using ovsdb-client, run::
+
+$ ovsdb-client monitor Open_vSwitch
+$ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive
+
+The datapath thread states are explained below::
+
+  KA_STATE_UNUSED  - Not registered to KA framework.
+  KA_STATE_ALIVE   - Thread alive.
+  KA_STATE_MISSING - Thread missed first heartbeat.
+  KA_STATE_DEAD- Thread missed two heartbeats.
+  KA_STATE_GONE- Thread missed two or more heartbeats and burried.
+  KA_STATE_DOZING  - Thread is idle.
+  KA_STATE_SLEEP   - Thread is sleeping.
+
+To query the datapath status, run::
+
+$ ovs-appctl keepalive/pmd-health-show
+
+`collectd <https://collectd.org/>`__ has built-in support for DPDK and provides
+a `ovs_events` and `ovs_stats` plugin that can be enabled to relay the datapath
+status and the PMD status to OpenStack service `Ceilometer
+<https://docs.openstack.org/developer/ceilometer/>`__.
+
+To install and configure `collectd`, run::
+
+# Clone collectd from Git repository
+$ git clone https://github.com/collectd/collectd.git
+
+# configure and install collectd
+$ cd collectd
+$ ./build.sh
+$ ./configure --enable-syslog --enable-logfile --

[ovs-dev] [PATCH v5 08/10] keepalive: Add support to query keepalive status and statistics.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit adds support to query keepalive status and statistics.

  $ ovs-appctl keepalive/status
keepAlive Status: Enabled

  $ ovs-appctl keepalive/pmd-health-show

  Keepalive status

keepalive status   : Enabled
keepalive interva l: 1000 ms
keepalive init time: 21 Aug 2017 16:20:31
PMD threads: 4

 PMDCORESTATE   LAST SEEN TIMESTAMP(UTC)
pmd620  ALIVE   21 Aug 2017 16:29:31
pmd631  ALIVE   21 Aug 2017 16:29:31
pmd642  ALIVE   21 Aug 2017 16:29:31
pmd653  GONE21 Aug 2017 16:26:31

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 103 
 1 file changed, 103 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index b140d21..9db1389 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -18,11 +18,13 @@
 
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
 #include "process.h"
 #include "seq.h"
 #include "timeval.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 
@@ -365,6 +367,101 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+static void
+ka_unixctl_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+
+ds_put_format(, "keepAlive Status: %s",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
+static void
+ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_info_)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\n\t\tKeepalive status\n\n");
+
+ds_put_format(, "keepalive status   : %s\n",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+if (!ka_is_enabled()) {
+goto out;
+}
+
+ds_put_format(, "keepalive interval : %"PRIu32" ms\n",
+  get_ka_interval());
+
+char *utc = xastrftime_msec("%d %b %Y %H:%M:%S",
+ka_info.init_time, true);
+ds_put_format(, "keepalive init time: %s \n", utc);
+
+struct keepalive_info *ka_info = (struct keepalive_info *)ka_info_;
+if (OVS_UNLIKELY(!ka_info)) {
+goto out;
+}
+
+ds_put_format(, "PMD threads: %"PRIu32" \n", 
ka_info->thread_cnt);
+ds_put_format(,
+  "\n PMD\tCORE\tSTATE\tLAST SEEN TIMESTAMP(UTC)\n");
+
+struct ka_process_info *pinfo, *pinfo_next;
+
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+char *state = NULL;
+
+if (pinfo->state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_UNUSED:
+break;
+default:
+OVS_NOT_REACHED();
+}
+
+utc = xastrftime_msec("%d %b %Y %H:%M:%S",
+pinfo->last_seen_time, true);
+
+ds_put_format(, "%s\t%2d\t%s\t%s\n",
+  pinfo->name, pinfo->core_id, state, utc);
+
+free(utc);
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+
+ds_put_format(, "\n");
+out:
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 /* Dispatch heartbeats from 'ovs_keepalive' thread. */
 void
 dispatch_heartbeats(void)
@@ -429,6 +526,12 @@ ka_init(const struct smap *ovs_other_config)
 
 ka_info.init_time = time_wall_msec();
 
+unixctl_command_register("keepalive/status", "", 0, 0,
+  ka_unixctl_status, NULL);
+
+unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
+  ka_unixctl_pmd_health_show, _info);
+
 ovsthread_once_done(_enable);
 }
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 07/10] bridge: Update keepalive status in OVSDB.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit allows vswitchd thread to update the OVSDB with the
status of all registered PMD threads. The status can be monitored
using ovsdb-client and the sample output is below.

$ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive

rowaction keepalive
7b746190-ee71-4dcc-becf-f8cb9c7cb909 old  {
"pmd62"="ALIVE,0,9226457935188922"
"pmd63"="ALIVE,1,150678618"
"pmd64"="ALIVE,2,150678618"
"pmd65"="ALIVE,3,150678618"}

 new  {
"pmd62"="ALIVE,0,9226460230167364"
"pmd63"="ALIVE,1,150679619"
"pmd64"="ALIVE,2,150679619"
"pmd65"="ALIVE,3,150679619""}

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c   | 15 +++
 lib/keepalive.h   |  1 +
 vswitchd/bridge.c | 26 ++
 3 files changed, 42 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index fe81919..b140d21 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -350,6 +350,21 @@ get_ka_stats(void)
 ovs_mutex_unlock();
 }
 
+struct smap *
+ka_stats_run(void)
+{
+struct smap *ka_stats = NULL;
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+ka_stats = keepalive_stats;
+keepalive_stats = NULL;
+}
+ovs_mutex_unlock();
+
+return ka_stats;
+}
+
 /* Dispatch heartbeats from 'ovs_keepalive' thread. */
 void
 dispatch_heartbeats(void)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 6e6ec68..1f76c34 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -103,6 +103,7 @@ void ka_cache_registered_threads(void);
 void ka_mark_pmd_thread_alive(int);
 void ka_mark_pmd_thread_sleep(int);
 void get_ka_stats(void);
+struct smap *ka_stats_run(void);
 void dispatch_heartbeats(void);
 void ka_init(const struct smap *);
 void ka_destroy(void);
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index dd9a009..82649c9 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -286,6 +286,7 @@ static bool port_is_synthetic(const struct port *);
 
 static void reconfigure_system_stats(const struct ovsrec_open_vswitch *);
 static void run_system_stats(void);
+static void run_keepalive_stats(void);
 
 static void bridge_configure_mirrors(struct bridge *);
 static struct mirror *mirror_create(struct bridge *,
@@ -403,6 +404,7 @@ bridge_init(const char *remote)
 
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_cur_cfg);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_statistics);
+ovsdb_idl_omit_alert(idl, _open_vswitch_col_keepalive);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_datapath_types);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_iface_types);
 ovsdb_idl_omit(idl, _open_vswitch_col_external_ids);
@@ -2686,6 +2688,29 @@ run_system_stats(void)
 }
 }
 
+void
+run_keepalive_stats(void)
+{
+struct smap *ka_stats;
+const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(idl);
+
+ka_stats = ka_stats_run();
+if (ka_stats && cfg) {
+struct ovsdb_idl_txn *txn;
+struct ovsdb_datum datum;
+
+txn = ovsdb_idl_txn_create(idl);
+ovsdb_datum_from_smap(, ka_stats);
+smap_destroy(ka_stats);
+ovsdb_idl_txn_write(>header_, _open_vswitch_col_keepalive,
+);
+ovsdb_idl_txn_commit(txn);
+ovsdb_idl_txn_destroy(txn);
+
+free(ka_stats);
+}
+}
+
 static const char *
 ofp12_controller_role_to_str(enum ofp12_controller_role role)
 {
@@ -3039,6 +3064,7 @@ bridge_run(void)
 run_stats_update();
 run_status_update();
 run_system_stats();
+run_keepalive_stats();
 }
 
 void
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 06/10] keepalive: Retrieve PMD status periodically.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit implements APIs to retrieve the PMD thread status and return
the status in the below format for each PMD thread.

  Format: pmdid="status,core id,last_seen_timestamp(epoch)"
  eg: pmd62="ALIVE,2,150332575"
  pmd63="GONE,3,150332525"

The status is periodically retrieved by keepalive thread and stored in
keepalive_stats struc which later shall be retrieved by vswitchd thread.
In case of four PMD threads the status is as below:

   "pmd62"="ALIVE,0,150332575"
   "pmd63"="ALIVE,1,150332575"
   "pmd64"="ALIVE,2,150332575"
   "pmd65"="ALIVE,3,150332575"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  1 +
 lib/keepalive.c   | 66 +++
 lib/keepalive.h   |  1 +
 3 files changed, 68 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index fd0ce61..56a3422 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1038,6 +1038,7 @@ ovs_keepalive(void *f_)
 /* Dispatch heartbeats only if pmd[s] exist. */
 if (hb_enable) {
 dispatch_heartbeats();
+get_ka_stats();
 }
 
 xnanosleep(interval);
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 3067e73..fe81919 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -19,6 +19,7 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "ovs-thread.h"
 #include "process.h"
 #include "seq.h"
 #include "timeval.h"
@@ -29,6 +30,9 @@ static bool keepalive_enable = false;  /* Keepalive 
disabled by default. */
 static uint32_t keepalive_timer_interval;  /* keepalive timer interval. */
 static struct keepalive_info ka_info;
 
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct smap *keepalive_stats OVS_GUARDED_BY(mutex);
+
 /* Returns true if state update is allowed, false otherwise. */
 static bool
 ka_can_update_state(void)
@@ -284,6 +288,68 @@ ka_mark_pmd_thread_sleep(int tid)
 }
 }
 
+static void
+get_pmd_status(struct smap *ka_pmd_stats)
+OVS_REQUIRES(ka_info.proclist_mutex)
+{
+struct ka_process_info *pinfo, *pinfo_next;
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info.process_list) {
+char *state = NULL;
+if (pinfo->state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_UNUSED:
+break;
+default:
+OVS_NOT_REACHED();
+}
+
+smap_add_format(ka_pmd_stats, pinfo->name, "%s,%d,%ld",
+state, pinfo->core_id, pinfo->last_seen_time);
+}
+}
+
+void
+get_ka_stats(void)
+{
+struct smap *ka_pmd_stats;
+ka_pmd_stats = xmalloc(sizeof *ka_pmd_stats);
+smap_init(ka_pmd_stats);
+
+ovs_mutex_lock(_info.proclist_mutex);
+get_pmd_status(ka_pmd_stats);
+ovs_mutex_unlock(_info.proclist_mutex);
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+smap_destroy(keepalive_stats);
+free(keepalive_stats);
+keepalive_stats = NULL;
+}
+keepalive_stats = ka_pmd_stats;
+ovs_mutex_unlock();
+}
+
 /* Dispatch heartbeats from 'ovs_keepalive' thread. */
 void
 dispatch_heartbeats(void)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 392a701..6e6ec68 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -102,6 +102,7 @@ void ka_free_cached_threads(void);
 void ka_cache_registered_threads(void);
 void ka_mark_pmd_thread_alive(int);
 void ka_mark_pmd_thread_sleep(int);
+void get_ka_stats(void);
 void dispatch_heartbeats(void);
 void ka_init(const struct smap *);
 void ka_destroy(void);
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 05/10] dpif-netdev: Enable heartbeats for DPDK datapath.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit adds heartbeat mechanism support for DPDK datapath. Heartbeats
are sent to registered PMD threads at predefined intervals (as set in ovsdb
with 'keepalive-interval').

The heartbeats are only enabled when there is atleast one port added to
the bridge and with active PMD thread polling the port.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 15 +--
 lib/keepalive.c   | 44 
 lib/keepalive.h   |  1 +
 3 files changed, 58 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index da419d5..fd0ce61 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1021,14 +1021,25 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 }
 
 static void *
-ovs_keepalive(void *f_ OVS_UNUSED)
+ovs_keepalive(void *f_)
 {
+struct dp_netdev *dp = f_;
+
 pthread_detach(pthread_self());
 
 for (;;) {
-int interval;
+int interval, n_pmds;
+bool hb_enable;
 
 interval = get_ka_interval();
+n_pmds = cmap_count(>poll_threads) - 1;
+hb_enable = (n_pmds > 0) ? true : false;
+
+/* Dispatch heartbeats only if pmd[s] exist. */
+if (hb_enable) {
+dispatch_heartbeats();
+}
+
 xnanosleep(interval);
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index da4defd..3067e73 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -284,6 +284,50 @@ ka_mark_pmd_thread_sleep(int tid)
 }
 }
 
+/* Dispatch heartbeats from 'ovs_keepalive' thread. */
+void
+dispatch_heartbeats(void)
+{
+struct ka_process_info *pinfo, *pinfo_next;
+
+/* Iterates over the list of processes in 'cached_process_list' map. */
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node,
+_info.cached_process_list) {
+if (pinfo->state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->state) {
+case KA_STATE_UNUSED:
+break;
+case KA_STATE_ALIVE:
+pinfo->state = KA_STATE_MISSING;
+pinfo->last_seen_time = time_wall_msec();
+break;
+case KA_STATE_MISSING:
+pinfo->state = KA_STATE_DEAD;
+break;
+case KA_STATE_DEAD:
+pinfo->state = KA_STATE_GONE;
+break;
+case KA_STATE_GONE:
+break;
+case KA_STATE_DOZING:
+pinfo->state = KA_STATE_SLEEP;
+pinfo->last_seen_time = time_wall_msec();
+break;
+case KA_STATE_SLEEP:
+break;
+default:
+OVS_NOT_REACHED();
+}
+
+/* Invoke 'ka_update_thread_state' cb function to update state info
+ * in to 'ka_info.process_list' map. */
+ka_info.relay_cb(pinfo->tid, pinfo->state, pinfo->last_seen_time);
+}
+}
+
 void
 ka_init(const struct smap *ovs_other_config)
 {
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 9e8bfdf..392a701 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -102,6 +102,7 @@ void ka_free_cached_threads(void);
 void ka_cache_registered_threads(void);
 void ka_mark_pmd_thread_alive(int);
 void ka_mark_pmd_thread_sleep(int);
+void dispatch_heartbeats(void);
 void ka_init(const struct smap *);
 void ka_destroy(void);
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 04/10] dpif-netdev: Register packet processing cores to KA framework.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit registers the packet processing PMD threads to keepalive
framework. Only PMDs that have rxqs mapped will be registered and
actively monitored by KA framework.

This commit spawns a keepalive thread that will dispatch heartbeats to
PMD threads. The pmd threads respond to heartbeats by marking themselves
alive. As long as PMD responds to heartbeats it is considered 'healthy'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  79 ++
 lib/keepalive.c   | 191 --
 lib/keepalive.h   |  20 ++
 lib/ovs-thread.c  |   6 ++
 lib/ovs-thread.h  |   1 +
 lib/util.c|  22 +++
 lib/util.h|   1 +
 7 files changed, 316 insertions(+), 4 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ca74df8..da419d5 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -49,6 +49,7 @@
 #include "flow.h"
 #include "hmapx.h"
 #include "id-pool.h"
+#include "keepalive.h"
 #include "latch.h"
 #include "netdev.h"
 #include "netdev-vport.h"
@@ -591,6 +592,7 @@ struct dp_netdev_pmd_thread {
 uint64_t last_reload_seq;
 atomic_bool reload; /* Do we need to reload ports? */
 pthread_t thread;
+pid_t tid;  /* Thread id of this pmd thread. */
 unsigned core_id;   /* CPU core id of this pmd thread. */
 int numa_id;/* numa node id of this pmd thread. */
 bool isolated;
@@ -1018,6 +1020,72 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void *
+ovs_keepalive(void *f_ OVS_UNUSED)
+{
+pthread_detach(pthread_self());
+
+for (;;) {
+int interval;
+
+interval = get_ka_interval();
+xnanosleep(interval);
+}
+
+return NULL;
+}
+
+/* Kickstart 'ovs_keepalive' thread. */
+static void
+ka_thread_start(struct dp_netdev *dp)
+{
+static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start()) {
+ovs_thread_create("ovs_keepalive", ovs_keepalive, dp);
+
+ovsthread_once_done();
+}
+}
+
+/* Register the datapath threads. This gets invoked on every datapath
+ * reconfiguration. The pmd thread[s] having rxq[s] mapped will be
+ * registered to KA framework.
+ */
+static void
+ka_register_datapath_threads(struct dp_netdev *dp)
+{
+if (!ka_is_enabled()) {
+return;
+}
+
+ka_thread_start(dp);
+
+ka_reload_datapath_threads_begin();
+
+struct dp_netdev_pmd_thread *pmd;
+CMAP_FOR_EACH (pmd, node, >poll_threads) {
+/*  Register only PMD threads. */
+if (pmd->core_id != NON_PMD_CORE_ID) {
+/* Skip PMD thread with no rxqs mapping. */
+if (OVS_UNLIKELY(!hmap_count(>poll_list))) {
+/* Rxq mapping changes due to datapath reconfiguration.
+ * If no rxqs mapped to PMD now due to reconfiguration,
+ * unregister the pmd thread. */
+ka_unregister_thread(pmd->tid);
+continue;
+}
+
+ka_register_thread(pmd->tid);
+VLOG_INFO("Registered PMD thread [%d] on Core[%d] to KA framework",
+  pmd->tid, pmd->core_id);
+}
+}
+ka_cache_registered_threads();
+
+ka_reload_datapath_threads_end();
+}
+
 static void
 dpif_netdev_pmd_rebalance(struct unixctl_conn *conn, int argc,
   const char *argv[], void *aux OVS_UNUSED)
@@ -3821,6 +3889,9 @@ reconfigure_datapath(struct dp_netdev *dp)
 
 /* Reload affected pmd threads. */
 reload_affected_pmds(dp);
+
+/* Register datapath threads to KA monitoring. */
+ka_register_datapath_threads(dp);
 }
 
 /* Returns true if one of the netdevs in 'dp' requires a reconfiguration */
@@ -4023,6 +4094,8 @@ pmd_thread_main(void *f_)
 
 /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
 ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
+/* Stores tid in to 'pmd->tid'. */
+ovsthread_settid(>tid);
 ovs_numa_thread_setaffinity_core(pmd->core_id);
 dpdk_set_lcore_id(pmd->core_id);
 poll_cnt = pmd_load_queues_and_ports(pmd, _list);
@@ -4056,6 +4129,9 @@ reload:
   : PMD_CYCLES_IDLE);
 }
 
+/* Mark PMD thread alive. */
+ka_mark_pmd_thread_alive(pmd->tid);
+
 if (lc++ > 1024) {
 bool reload;
 
@@ -4089,6 +4165,9 @@ reload:
 }
 
 emc_cache_uninit(>flow_cache);
+
+ka_unregister_thread(pmd->tid);
+
 free(poll_list);
 pmd_free_cached_ports(pmd);
 return NULL;
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 1f151f6..da4defd 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -19,6 +19,7 @@
 #include "keepalive.h"
 #include "

[ovs-dev] [PATCH v5 03/10] util: Add high resolution sleep support.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit introduces xnanosleep() for the threads needing high
resolution sleep timeouts.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/timeval.c |  2 +-
 lib/timeval.h |  1 +
 lib/util.c| 19 +++
 lib/util.h|  1 +
 4 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/lib/timeval.c b/lib/timeval.c
index dd63f03..8190b41 100644
--- a/lib/timeval.c
+++ b/lib/timeval.c
@@ -474,7 +474,7 @@ xclock_gettime(clock_t id, struct timespec *ts)
 }
 }
 
-static void
+void
 msec_to_timespec(long long int ms, struct timespec *ts)
 {
 ts->tv_sec = ms / 1000;
diff --git a/lib/timeval.h b/lib/timeval.h
index 7957dad..110745c 100644
--- a/lib/timeval.h
+++ b/lib/timeval.h
@@ -69,6 +69,7 @@ size_t strftime_msec(char *s, size_t max, const char *format,
  const struct tm_msec *);
 void xgettimeofday(struct timeval *);
 void xclock_gettime(clock_t, struct timespec *);
+void msec_to_timespec(long long int , struct timespec *);
 
 int get_cpu_usage(void);
 
diff --git a/lib/util.c b/lib/util.c
index 36e3731..4ad7eea 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -2197,6 +2197,25 @@ xsleep(unsigned int seconds)
 ovsrcu_quiesce_end();
 }
 
+/* High resolution sleep. */
+void
+xnanosleep(uint64_t ms)
+{
+ovsrcu_quiesce_start();
+#ifdef __linux__
+int retval;
+struct timespec ts_sleep;
+msec_to_timespec(ms, _sleep);
+
+int error = 0;
+do {
+retval = nanosleep(_sleep, NULL);
+error = retval < 0 ? errno : 0;
+} while (error == EINTR);
+#endif
+ovsrcu_quiesce_end();
+}
+
 /* Determine whether standard output is a tty or not. This is useful to decide
  * whether to use color output or not when --color option for utilities is set
  * to `auto`.
diff --git a/lib/util.h b/lib/util.h
index 764e0a0..0449fa1 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -489,6 +489,7 @@ ovs_u128_and(const ovs_u128 a, const ovs_u128 b)
 }
 
 void xsleep(unsigned int seconds);
+void xnanosleep(uint64_t ms);
 
 bool is_stdout_a_tty(void);
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5 02/10] Keepalive: Add initial keepalive support.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit introduces the initial keepalive support by adding
'keepalive' module and also helper and initialization functions
that will be invoked by later commits.

This commit adds new ovsdb column "keepalive" that shows the status
of the datapath threads. This is implemented for DPDK datapath and
only status of PMD threads is reported.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/automake.mk|   2 +
 lib/keepalive.c| 145 +
 lib/keepalive.h|  88 +++
 vswitchd/bridge.c  |   3 +
 vswitchd/vswitch.ovsschema |   8 ++-
 vswitchd/vswitch.xml   |  49 +++
 6 files changed, 293 insertions(+), 2 deletions(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 2415f4c..0d99f0a 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -110,6 +110,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/json.c \
lib/jsonrpc.c \
lib/jsonrpc.h \
+   lib/keepalive.c \
+   lib/keepalive.h \
lib/lacp.c \
lib/lacp.h \
lib/latch.h \
diff --git a/lib/keepalive.c b/lib/keepalive.c
new file mode 100644
index 000..1f151f6
--- /dev/null
+++ b/lib/keepalive.c
@@ -0,0 +1,145 @@
+/*
+ * Copyright (c) 2017 Intel, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+
+#include "keepalive.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+#include "seq.h"
+#include "timeval.h"
+
+VLOG_DEFINE_THIS_MODULE(keepalive);
+
+static bool keepalive_enable = false;  /* Keepalive disabled by default. */
+static uint32_t keepalive_timer_interval;  /* keepalive timer interval. */
+static struct keepalive_info ka_info;
+
+/* Returns true if keepalive is enabled, false otherwise. */
+bool
+ka_is_enabled(void)
+{
+return keepalive_enable;
+}
+
+/* Finds the thread by 'tid' in 'process_list' map and update
+ * the thread state and last_seen_time stamp.  This is invoked
+ * periodically(based on keepalive-interval) as part of callback
+ * function in the context of keepalive thread.
+ */
+static void
+ka_set_thread_state_ts(pid_t tid, enum keepalive_state state,
+   uint64_t last_alive)
+{
+struct ka_process_info *pinfo;
+
+ovs_mutex_lock(_info.proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info.process_list) {
+if (pinfo->tid == tid) {
+pinfo->state = state;
+pinfo->last_seen_time = last_alive;
+}
+}
+ovs_mutex_unlock(_info.proclist_mutex);
+}
+
+/* Retrieve and return the keepalive timer interval from OVSDB. */
+static uint32_t
+ka_get_timer_interval(const struct smap *ovs_other_config)
+{
+uint32_t ka_interval;
+
+/* Timer granularity in milliseconds
+ * Defaults to OVS_KEEPALIVE_TIMEOUT(ms) if not set */
+ka_interval = smap_get_int(ovs_other_config, "keepalive-interval",
+   OVS_KEEPALIVE_DEFAULT_TIMEOUT);
+
+VLOG_INFO("Keepalive timer interval set to %"PRIu32" (ms)\n", ka_interval);
+return ka_interval;
+}
+
+/*
+ * This function is invoked periodically to write the status and
+ * last seen timestamp of the thread in to 'process_list' map.
+ */
+static void
+ka_update_thread_state(pid_t tid, const enum keepalive_state state,
+   uint64_t last_alive)
+{
+switch (state) {
+case KA_STATE_ALIVE:
+case KA_STATE_MISSING:
+ka_set_thread_state_ts(tid, KA_STATE_ALIVE, last_alive);
+break;
+case KA_STATE_UNUSED:
+case KA_STATE_DOZING:
+case KA_STATE_SLEEP:
+case KA_STATE_DEAD:
+case KA_STATE_GONE:
+ka_set_thread_state_ts(tid, state, last_alive);
+break;
+default:
+OVS_NOT_REACHED();
+}
+}
+
+/* Register relay callback function. */
+static void
+keepalive_register_relay_cb(ka_relay_cb cb, void *aux)
+{
+ka_info.relay_cb = cb;
+ka_info.relay_cb_data = aux;
+}
+
+void
+ka_init(const struct smap *ovs_other_config)
+{
+if (smap_get_bool(ovs_other_config, "enable-keepalive", false)) {
+static struct ovsthread_once once_enable = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start(_enab

[ovs-dev] [PATCH v5 01/10] process: Extend get_process_info() for additional fields.

2017-09-15 Thread Bhanuprakash Bodireddy
This commit enables the fields relating to process name and the core
number the process was last scheduled. The fields will be used by keepalive
monitoring framework in future commits.

This commit also fixes the following "sparse" warning:

  lib/process.c:439:16: error: use of assignment suppression and length
  modifier together in gnu_scanf format [-Werror=format=].

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/process.c | 43 +++
 lib/process.h |  2 ++
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/lib/process.c b/lib/process.c
index 3e119b5..95df112 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -64,7 +64,8 @@ struct raw_process_info {
 long long int uptime;   /* ms since started. */
 long long int cputime;  /* ms of CPU used during 'uptime'. */
 pid_t ppid; /* Parent. */
-char name[18];  /* Name (surrounded by parentheses). */
+int core_id;/* Core id last executed on. */
+char name[18];  /* Name. */
 };
 
 /* Pipe used to signal child termination. */
@@ -421,7 +422,7 @@ get_raw_process_info(pid_t pid, struct raw_process_info 
*raw)
 
 n = fscanf(stream,
"%*d "   /* (1. pid) */
-   "%17s "  /* 2. process name */
+   "(%17[^)]) " /* 2. process name */
"%*c "   /* (3. state) */
"%lu "   /* 4. ppid */
"%*d "   /* (5. pgid) */
@@ -444,33 +445,34 @@ get_raw_process_info(pid_t pid, struct raw_process_info 
*raw)
"%llu "  /* 22. start_time */
"%llu "  /* 23. vsize */
"%llu "  /* 24. rss */
+   "%*u "   /* (25. rsslim) */
+   "%*u "   /* (26. start_code) */
+   "%*u "   /* (27. end_code) */
+   "%*u "   /* (28. start_stack) */
+   "%*u "   /* (29. esp) */
+   "%*u "   /* (30. eip) */
+   "%*u "   /* (31. pending signals) */
+   "%*u "   /* (32. blocked signals) */
+   "%*u "   /* (33. ignored signals) */
+   "%*u "   /* (34. caught signals) */
+   "%*u "   /* (35. whcan) */
+   "%*u "   /* (36. always 0) */
+   "%*u "   /* (37. always 0) */
+   "%*d "   /* (38. exit_signal) */
+   "%d "/* 39. task_cpu */
 #if 0
/* These are here for documentation but #if'd out to save
 * actually parsing them from the stream for no benefit. */
-   "%*lu "  /* (25. rsslim) */
-   "%*lu "  /* (26. start_code) */
-   "%*lu "  /* (27. end_code) */
-   "%*lu "  /* (28. start_stack) */
-   "%*lu "  /* (29. esp) */
-   "%*lu "  /* (30. eip) */
-   "%*lu "  /* (31. pending signals) */
-   "%*lu "  /* (32. blocked signals) */
-   "%*lu "  /* (33. ignored signals) */
-   "%*lu "  /* (34. caught signals) */
-   "%*lu "  /* (35. whcan) */
-   "%*lu "  /* (36. always 0) */
-   "%*lu "  /* (37. always 0) */
-   "%*d "   /* (38. exit_signal) */
-   "%*d "   /* (39. task_cpu) */
"%*u "   /* (40. rt_priority) */
"%*u "   /* (41. policy) */
"%*llu " /* (42. blkio_ticks) */
"%*lu "  /* (43. gtime) */
"%*ld"   /* (44. cgtime) */
 #endif
-   , raw->name, , , , _time, , );
+   , raw->name, , , , _time,
+  , , >core_id);
 fclose(stream);
-if (n != 7) {
+if (n != 8) {
 VLOG_ERR_ONCE("%s: fscanf failed", file_name);
 return false;
 }
@@ -496,12 +498,14 @@ get_process_info(pid_t pid, struct process_info *pinfo)
 return false;
 }
 
+ovs_strlcpy(pinfo->name, child.name, sizeof pinfo->name);
 pinfo->vsz = child.vsz;
 pinfo->rss = child.rss;
 pinfo->booted = child.uptime;
 pinfo->crashes = 0;
 pinfo->uptime = child.uptime;
   

[ovs-dev] [PATCH v5 00/10] Add OVS DPDK keep-alive functionality.

2017-09-15 Thread Bhanuprakash Bodireddy
Keepalive feature is aimed at achieving Fastpath Service Assurance
in OVS-DPDK deployments. It adds support for monitoring the packet
processing threads by dispatching heartbeats at regular intervals.
 
keepalive feature can be enabled through below OVSDB settings.

enable-keepalive=true
  - Keepalive feature is disabled by default and should be enabled
at startup before ovs-vswitchd daemon is started.

keepalive-interval="5000"
  - Timer interval in milliseconds for monitoring the packet
processing cores.

v4 -> v5
  * Add 3 more patches to the series
 - xnanosleep()
 - Documentation
 - Update to NEWS
  * Remove all references to core_id and instead implemented thread based 
tracking.
  * Addressed most of the comments in v4.

v3 -> v4
  * Split the functionality in to 2 parts. This patch series only updates
PMD status to OVSDB. The incremental patch series to handle false positives,
negatives and more checking and stats. 
  * Remove code from netdev layer and dependency on rte_keepalive lib.
  * Merged few patches and simplified the patch series.
  * Timestamp in human readable form.

v2 -> v3
  * Rebase.
  * Verified with dpdk-stable-17.05.1 release.
  * Fixed build issues with MSVC and cross checked with appveyor.

v1 -> v2
  * Rebase
  * Drop 01/20 Patch "Consolidate process related APIs" of V1 as it
is already applied as separate patch.

RFCv3 -> v1
  * Made changes to fix failures in some unit test cases.
  * some more code cleanup w.r.t process related APIs.

RFCv2 -> RFCv3
  * Remove POSIX shared memory block implementation (suggested by Aaron).
  * Rework the logic to register and track threads instead of cores. This way
in the future any thread can be registered to KA framework. For now only PMD
threads are tracked (suggested by Aaron).
  * Refactor few APIs and further clean up the code.
   
RFCv1 -> RFCv2
  * Merged the xml and schema commits to later commit where the actual
implementation is done(suggested by Ben).
  * Fix ovs-appctl keepalive/* hang issue when KA disabled.
  * Fixed memory leaks with appctl commands for keepalive/pmd-health-show,
pmd-xstats-show.
  * Refactor code and fixed APIs dealing with PMD health monitoring.


Bhanuprakash Bodireddy (10):
  process: Extend get_process_info() for additional fields.
  Keepalive: Add initial keepalive support.
  util: Add high resolution sleep support.
  dpif-netdev: Register packet processing cores to KA framework.
  dpif-netdev: Enable heartbeats for DPDK datapath.
  keepalive: Retrieve PMD status periodically.
  bridge: Update keepalive status in OVSDB.
  keepalive: Add support to query keepalive status and statistics.
  Documentation: Update DPDK doc with Keepalive feature.
  NEWS: Add keepalive support information in NEWS.

 Documentation/howto/dpdk.rst | 113 +
 NEWS |   2 +
 lib/automake.mk  |   2 +
 lib/dpif-netdev.c|  91 +++
 lib/keepalive.c  | 556 +++
 lib/keepalive.h  | 111 +
 lib/ovs-thread.c |   6 +
 lib/ovs-thread.h |   1 +
 lib/process.c|  43 ++--
 lib/process.h|   2 +
 lib/timeval.c|   2 +-
 lib/timeval.h|   1 +
 lib/util.c   |  41 
 lib/util.h   |   2 +
 vswitchd/bridge.c|  29 +++
 vswitchd/vswitch.ovsschema   |   8 +-
 vswitchd/vswitch.xml |  49 
 17 files changed, 1036 insertions(+), 23 deletions(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/2] dpif-netdev: Fix comments for pmd_load_cached_ports.

2017-09-12 Thread Bhanuprakash Bodireddy
Commit 57eebbb4c315 replaces thread local 'pmd->port_cache' with
'pmd->tnl_port_cache' and 'pmd->send_port_cache' maps. Update the
comments accordingly.

Fixes: 57eebbb4c315 ("Don't try to output on a device without txqs")
Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index ca74df8..11d3ab4 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3933,7 +3933,9 @@ pmd_free_cached_ports(struct dp_netdev_pmd_thread *pmd)
 }
 
 /* Copies ports from 'pmd->tx_ports' (shared with the main thread) to
- * 'pmd->port_cache' (thread local) */
+ * thread-local copies. Copy to 'pmd->tnl_port_cache' if it is a tunnel
+ * device, otherwise to 'pmd->send_port_cache' if the port has atleast
+ * one txq. */
 static void
 pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
 OVS_REQUIRES(pmd->port_mutex)
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/2] conntrack: Use DP_PACKET_BATCH_FOR_EACH macro.

2017-09-12 Thread Bhanuprakash Bodireddy
Use DP_PACKET_BATCH_FOR_EACH macro in conntrack_execute(). No change in
functionality.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/conntrack.c | 13 ++---
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 419cb1d..019092e 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1141,17 +1141,16 @@ conntrack_execute(struct conntrack *ct, struct 
dp_packet_batch *pkt_batch,
   long long now)
 {
 
-struct dp_packet **pkts = pkt_batch->packets;
-size_t cnt = pkt_batch->count;
+struct dp_packet *packet;
 struct conn_lookup_ctx ctx;
 
-for (size_t i = 0; i < cnt; i++) {
-if (!conn_key_extract(ct, pkts[i], dl_type, , zone)) {
-pkts[i]->md.ct_state = CS_INVALID;
-write_ct_md(pkts[i], zone, NULL, NULL, NULL);
+DP_PACKET_BATCH_FOR_EACH (packet, pkt_batch) {
+if (!conn_key_extract(ct, packet, dl_type, , zone)) {
+packet->md.ct_state = CS_INVALID;
+write_ct_md(packet, zone, NULL, NULL, NULL);
 continue;
 }
-process_one(ct, pkts[i], , zone, force, commit,
+process_one(ct, packet, , zone, force, commit,
 now, setmark, setlabel, nat_action_info, helper);
 }
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 13/13] ofproto-dpif-xlate: Fix dead assignment reported by clang.

2017-09-08 Thread Bhanuprakash Bodireddy
Clang reports that value stored in to ac_offset is never read in the
function.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 ofproto/ofproto-dpif-xlate.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 9e1f837..5d83666 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -5370,7 +5370,6 @@ compose_clone(struct xlate_ctx *ctx, const struct 
ofpact_nest *oc)
 if (ctx->xbridge->support.clone) { /* Use clone action */
 /* Use clone action as datapath clone. */
 offset = nl_msg_start_nested(ctx->odp_actions, OVS_ACTION_ATTR_CLONE);
-ac_offset = ctx->odp_actions->size;
 do_xlate_actions(oc->actions, oc_actions_len, ctx);
 nl_msg_end_non_empty_nested(ctx->odp_actions, offset);
 goto dp_clone_done;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 12/13] conntrack: Fix dead assignment reported by clang.

2017-09-08 Thread Bhanuprakash Bodireddy
Clang reports that value stored to ftp, seq_skew_dir never read inside
the function.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/conntrack.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 419cb1d..a0838ee 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2615,7 +2615,7 @@ process_ftp_ctl_v4(struct conntrack *ct,
 char ftp_msg[LARGEST_FTP_MSG_OF_INTEREST + 1] = {0};
 get_ftp_ctl_msg(pkt, ftp_msg);
 
-char *ftp = ftp_msg;
+char *ftp;
 enum ct_alg_mode mode;
 if (!strncasecmp(ftp_msg, FTP_PORT_CMD, strlen(FTP_PORT_CMD))) {
 ftp = ftp_msg + strlen(FTP_PORT_CMD);
@@ -2761,7 +2761,7 @@ process_ftp_ctl_v6(struct conntrack *ct,
 get_ftp_ctl_msg(pkt, ftp_msg);
 *ftp_data_start = tcp_hdr + tcp_hdr_len;
 
-char *ftp = ftp_msg;
+char *ftp;
 struct in6_addr ip6_addr;
 if (!strncasecmp(ftp_msg, FTP_EPRT_CMD, strlen(FTP_EPRT_CMD))) {
 ftp = ftp_msg + strlen(FTP_EPRT_CMD);
@@ -2909,7 +2909,6 @@ handle_ftp_ctl(struct conntrack *ct, const struct 
conn_lookup_ctx *ctx,
 bool seq_skew_dir;
 if (ftp_ctl == CT_FTP_CTL_OTHER) {
 seq_skew = conn_for_expectation->seq_skew;
-seq_skew_dir = conn_for_expectation->seq_skew_dir;
 } else if (ftp_ctl == CT_FTP_CTL_INTEREST) {
 enum ftp_ctl_pkt rc;
 if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 11/13] ofproto: Reorder elements in ofproto_bundle_settings structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in ofproto_bundle_settings structure, sum holes
and pad bytes can be reduced.

Before: structure size: 96, sum holes: 13, pad bytes: 7, cachelines:2
After : structure size: 80, sum holes:  4, pad bytes: 0, cachelines:2

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 ofproto/ofproto.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 9e35327..2a7c1f3 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -403,18 +403,17 @@ struct ofproto_bundle_settings {
 size_t n_slaves;
 
 enum port_vlan_mode vlan_mode; /* Selects mode for vlan and trunks */
+bool protected; /* Protected port mode */
+bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */
 uint16_t qinq_ethtype;
 int vlan;   /* VLAN VID, except for PORT_VLAN_TRUNK. */
 unsigned long *trunks;  /* vlan_bitmap, except for PORT_VLAN_ACCESS. */
 unsigned long *cvlans;
-bool use_priority_tags; /* Use 802.1p tag for frames in VLAN 0? */
 
 struct bond_settings *bond; /* Must be nonnull iff if n_slaves > 1. */
 
 struct lacp_settings *lacp;  /* Nonnull to enable LACP. */
 struct lacp_slave_settings *lacp_slaves; /* Array of n_slaves elements. */
-
-bool protected; /* Protected port mode */
 };
 
 int ofproto_bundle_register(struct ofproto *, void *aux,
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 10/13] ofproto: Reorder elements in ofproto_ipfix_flow_exporter_options structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in ofproto_ipfix_flow_exporter_options structure,
sum holes can be reduced significantly.

Before: structure size: 64, sum holes: 11, cachelines:1
After : structure size: 56, sum holes:  3, cachelines:1

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 ofproto/ofproto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ofproto/ofproto.h b/ofproto/ofproto.h
index 1e48e19..9e35327 100644
--- a/ofproto/ofproto.h
+++ b/ofproto/ofproto.h
@@ -87,10 +87,10 @@ struct ofproto_ipfix_bridge_exporter_options {
 
 struct ofproto_ipfix_flow_exporter_options {
 uint32_t collector_set_id;
+bool enable_tunnel_sampling;
 struct sset targets;
 uint32_t cache_active_timeout;
 uint32_t cache_max_flows;
-bool enable_tunnel_sampling;
 char *virtual_obs_id;
 };
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 08/13] dpif-netdev: Reorder elements in dp_netdev_rxq structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in dp_netdev_rxq structure, pad bytes and a hole
can be removed.

Before: structure size: 104, sum holes: 1, sum padbytes:4, cachelines:2
After : structure size:  96, sum holes: 0, sum padbytes:0, cachelines:2

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 071ec14..b1ef0fb 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -366,6 +366,7 @@ struct dp_netdev_rxq {
   pinned. OVS_CORE_UNSPEC if the
   queue doesn't need to be pinned to a
   particular core. */
+unsigned intrvl_idx;   /* Write index for 'cycles_intrvl'. */
 struct dp_netdev_pmd_thread *pmd;  /* pmd thread that polls this queue. */
 
 /* Counters of cycles spent successfully polling and processing pkts. */
@@ -373,7 +374,6 @@ struct dp_netdev_rxq {
 /* We store PMD_RXQ_INTERVAL_MAX intervals of data for an rxq and then
sum them to yield the cycles used for an rxq. */
 atomic_ullong cycles_intrvl[PMD_RXQ_INTERVAL_MAX];
-unsigned intrvl_idx;   /* Write index for 'cycles_intrvl'. */
 };
 
 /* A port in a netdev-based datapath. */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 09/13] dpif: Reorder elements in dpif_flow_put structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in dpif_flow_put structure, holes can be removed.

Before: structure size: 80, sum holes: 8, cachelines:2
After : structure size: 72, sum holes: 0, cachelines:2

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/dpif.h b/lib/dpif.h
index d9ded8b..bef7845 100644
--- a/lib/dpif.h
+++ b/lib/dpif.h
@@ -638,6 +638,7 @@ enum dpif_op_type {
  */
 struct dpif_flow_put {
 /* Input. */
+unsigned pmd_id;/* Datapath poll mode driver id. */
 enum dpif_flow_put_flags flags; /* DPIF_FP_*. */
 const struct nlattr *key;   /* Flow to put. */
 size_t key_len; /* Length of 'key' in bytes. */
@@ -646,7 +647,6 @@ struct dpif_flow_put {
 const struct nlattr *actions;   /* Actions to perform on flow. */
 size_t actions_len; /* Length of 'actions' in bytes. */
 const ovs_u128 *ufid;   /* Optional unique flow identifier. */
-unsigned pmd_id;/* Datapath poll mode driver id. */
 
 /* Output. */
 struct dpif_flow_stats *stats;  /* Optional flow statistics. */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 07/13] netdev-provider: Reorder elements in netdev structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in netdev structure, holes can be removed.

Before: structure size: 88, sum holes: 10, cachelines:2
After : structure size: 80, sum holes:  2, cachelines:2

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-provider.h | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index 5d7bd4f..1720deb 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -49,6 +49,12 @@ struct netdev {
  * opening this device, and therefore got assigned to the "system" class */
 bool auto_classified;
 
+/* If this is 'true', the user explicitly specified an MTU for this
+ * netdev.  Otherwise, Open vSwitch is allowed to override it. */
+bool mtu_user_config;
+
+int ref_cnt;/* Times this devices was opened. */
+
 /* A sequence number which indicates changes in one of 'netdev''s
  * properties.   It must be nonzero so that users have a value which
  * they may use as a reset when tracking 'netdev'.
@@ -67,16 +73,11 @@ struct netdev {
 struct seq *reconfigure_seq;
 uint64_t last_reconfigure_seq;
 
-/* If this is 'true', the user explicitly specified an MTU for this
- * netdev.  Otherwise, Open vSwitch is allowed to override it. */
-bool mtu_user_config;
-
 /* The core netdev code initializes these at netdev construction and only
  * provide read-only access to its client.  Netdev implementations may
  * modify them. */
 int n_txq;
 int n_rxq;
-int ref_cnt;/* Times this devices was opened. */
 struct shash_node *node;/* Pointer to element in global map. */
 struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". 
*/
 };
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 05/13] netdev: Reorder elements in netdev_tunnel_config structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in netdev_tunnel_config structure, sum holes and
pad bytes can be reduced.

Before: structure size: 96, sum holes: 17, pad bytes: 4, cachelines:2
After : structure size: 80, sum holes:  5, pad bytes: 0, cachelines:2

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lib/netdev.h b/lib/netdev.h
index f8482f7..3a545fe 100644
--- a/lib/netdev.h
+++ b/lib/netdev.h
@@ -99,9 +99,9 @@ enum netdev_pt_mode {
 
 /* Configuration specific to tunnels. */
 struct netdev_tunnel_config {
+ovs_be64 in_key;
 bool in_key_present;
 bool in_key_flow;
-ovs_be64 in_key;
 
 bool out_key_present;
 bool out_key_flow;
@@ -115,8 +115,8 @@ struct netdev_tunnel_config {
 struct in6_addr ipv6_dst;
 
 uint32_t exts;
-bool set_egress_pkt_mark;
 uint32_t egress_pkt_mark;
+bool set_egress_pkt_mark;
 
 uint8_t ttl;
 bool ttl_inherit;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 04/13] ct-dpif: Reorder elements in ct_dpif_entry structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in ct_dpif_entry structure, sum holes and pad
bytes can be reduced.

Before: structure size: 232, sum holes: 5, pad bytes: 4, cachelines:4
After : structure size: 224, sum holes: 1, pad bytes: 0, cachelines:4

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/ct-dpif.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/ct-dpif.h b/lib/ct-dpif.h
index d5f9661..ba4cb39 100644
--- a/lib/ct-dpif.h
+++ b/lib/ct-dpif.h
@@ -156,6 +156,8 @@ struct ct_dpif_entry {
 uint16_t zone;
 
 /* Modifiable members. */
+bool have_labels;
+ovs_u128 labels;
 
 struct ct_dpif_counters counters_orig;
 struct ct_dpif_counters counters_reply;
@@ -163,9 +165,8 @@ struct ct_dpif_entry {
 struct ct_dpif_timestamp timestamp;
 struct ct_dpif_protoinfo protoinfo;
 
-ovs_u128 labels;
-bool have_labels;
 uint32_t status;
+
 /* Timeout for this entry in seconds */
 uint32_t timeout;
 uint32_t mark;
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 03/13] tun-metadata: Reorder elements in tun_meta_entry structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in tun_meta_entry structure, sum holes and pad
bytes can be reduced there by reducing the tun_table size.

Before: structure size: 56, sum holes: 4, pad bytes: 7  cachelines:1
After : structure size: 48, sum holes: 0, pad bytes: 3, cachelines:1

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/tun-metadata.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/tun-metadata.c b/lib/tun-metadata.c
index 2fbd9b6..0cb2456 100644
--- a/lib/tun-metadata.c
+++ b/lib/tun-metadata.c
@@ -32,8 +32,8 @@
 
 struct tun_meta_entry {
 struct hmap_node node;  /* In struct tun_table's key_hmap. */
-uint32_t key;   /* (class << 8) | type. */
 struct tun_metadata_loc loc;
+uint32_t key;   /* (class << 8) | type. */
 bool valid; /* True if allocated to a class and type. */
 };
 
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 02/13] netdev-dummy: Reorder elements in dummy_packet_stream structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in dummy_packet_stream structure, sum holes
can be reduced, thus saving a cache line.

Before: structure size: 784, sum holes: 56, cachelines:13
After : structure size: 768, sum holes: 40, cachelines:12

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/netdev-dummy.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index f731af1..d888c40 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -50,8 +50,8 @@ struct reconnect;
 
 struct dummy_packet_stream {
 struct stream *stream;
-struct dp_packet rxbuf;
 struct ovs_list txq;
+struct dp_packet rxbuf;
 };
 
 enum dummy_packet_conn_type {
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 01/13] bond: Reorder elements in bond_slave structure.

2017-09-08 Thread Bhanuprakash Bodireddy
By reordering elements in bond_slave structure, holes can be removed and
saves a cache line.

Before: structure size: 136, sum holes: 10, cachelines:3
After : structure size: 128, sum holes:  2, cachelines:2

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 ofproto/bond.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ofproto/bond.c b/ofproto/bond.c
index 365a3ca..a656226 100644
--- a/ofproto/bond.c
+++ b/ofproto/bond.c
@@ -88,13 +88,13 @@ struct bond_slave {
 
 struct netdev *netdev;  /* Network device, owned by the client. */
 uint64_t change_seq;/* Tracks changes in 'netdev'. */
-ofp_port_t  ofp_port;   /* OpenFlow port number. */
 char *name; /* Name (a copy of netdev_get_name(netdev)). */
+ofp_port_t  ofp_port;   /* OpenFlow port number. */
 
 /* Link status. */
-long long delay_expires;/* Time after which 'enabled' may change. */
 bool enabled;   /* May be chosen for flows? */
 bool may_enable;/* Client considers this slave bondable. */
+long long delay_expires;/* Time after which 'enabled' may change. */
 
 /* Rebalancing info.  Used only by bond_rebalance(). */
 struct ovs_list bal_node;   /* In bond_rebalance()'s 'bals' list. */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 00/13] Rearrange structure members for memory efficiency.

2017-09-08 Thread Bhanuprakash Bodireddy
This is a simple patch series that reorders members in some structures
to reduce/remove holes and pad bytes, thus saving memory footprint.
This series doesn't change any functionality and care has been taken not
to disturb the grouping of related members variables in structures.

Please note that no performance impact is observed with the new ordering.

Bhanuprakash Bodireddy (13):
  bond: Reorder elements in bond_slave structure.
  netdev-dummy: Reorder elements in dummy_packet_stream structure.
  tun-metadata: Reorder elements in tun_meta_entry structure.
  ct-dpif: Reorder elements in ct_dpif_entry structure.
  netdev: Reorder elements in netdev_tunnel_config structure.
  netdev-provider: Reorder element in netdev_flow_dump structure.
  netdev-provider: Reorder elements in netdev structure.
  dpif-netdev: Reorder elements in dp_netdev_rxq structure.
  dpif: Reorder elements in dpif_flow_put structure.
  ofproto: Reorder elements in ofproto_ipfix_flow_exporter_options
structure.
  ofproto: Reorder elements in ofproto_bundle_settings structure.
  conntrack: Fix dead assignment reported by clang.
  ofproto-dpif-xlate: Fix dead assignment reported by clang.

 lib/conntrack.c  |  5 ++---
 lib/ct-dpif.h|  5 +++--
 lib/dpif-netdev.c|  2 +-
 lib/dpif.h   |  2 +-
 lib/netdev-dummy.c   |  2 +-
 lib/netdev-provider.h| 13 +++--
 lib/netdev.h |  4 ++--
 lib/tun-metadata.c   |  2 +-
 ofproto/bond.c   |  4 ++--
 ofproto/ofproto-dpif-xlate.c |  1 -
 ofproto/ofproto.h|  7 +++
 11 files changed, 23 insertions(+), 24 deletions(-)

-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 7/7] keepalive: Add support to query keepalive status and statistics.

2017-08-22 Thread Bhanuprakash Bodireddy
This commit adds support to query keepalive status and statistics.

  $ ovs-appctl keepalive/status
keepAlive Status: Enabled

  $ ovs-appctl keepalive/pmd-health-show

  Keepalive status

keepalive status  : Enabled
keepalive interval: 1000 ms
PMD threads   : 4

 PMDCORESTATE   LAST SEEN TIMESTAMP(UTC)
pmd620  ALIVE   21 Aug 2017 16:29:31
pmd631  ALIVE   21 Aug 2017 16:29:31
pmd642  ALIVE   21 Aug 2017 16:29:31
pmd653  GONE21 Aug 2017 16:26:31

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c | 97 +
 1 file changed, 97 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 2497f00..119e351 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -22,10 +22,12 @@
 
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
+#include "openvswitch/dynamic-string.h"
 #include "openvswitch/vlog.h"
 #include "ovs-thread.h"
 #include "process.h"
 #include "timeval.h"
+#include "unixctl.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
 
@@ -295,6 +297,95 @@ ka_stats_run(void)
 return ka_stats;
 }
 
+static void
+ka_unixctl_status(struct unixctl_conn *conn, int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED, void *aux OVS_UNUSED)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+
+ds_put_format(, "keepAlive Status: %s",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
+static void
+ka_unixctl_pmd_health_show(struct unixctl_conn *conn, int argc OVS_UNUSED,
+   const char *argv[] OVS_UNUSED, void *ka_info_)
+{
+struct ds ds = DS_EMPTY_INITIALIZER;
+ds_put_format(,
+  "\n\t\tKeepalive status\n\n");
+
+ds_put_format(, "keepalive status  : %s\n",
+  ka_is_enabled() ? "Enabled" : "Disabled");
+
+if (!ka_is_enabled()) {
+goto out;
+}
+
+ds_put_format(, "keepalive interval: %"PRIu32" ms\n",
+  get_ka_interval());
+
+struct keepalive_info *ka_info = (struct keepalive_info *)ka_info_;
+if (OVS_UNLIKELY(!ka_info)) {
+goto out;
+}
+
+ds_put_format(, "PMD threads   : %"PRIu32" \n", ka_info->pmd_cnt);
+ds_put_format(,
+  "\n PMD\tCORE\tSTATE\tLAST SEEN TIMESTAMP(UTC)\n");
+
+struct ka_process_info *pinfo, *pinfo_next;
+
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+char *state = NULL;
+
+if (pinfo->core_state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->core_state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+
+char *utc = xastrftime_msec("%d %b %Y %H:%M:%S",
+pinfo->core_last_seen_times, true);
+
+ds_put_format(, "%s\t%2d\t%s\t%s\n",
+  pinfo->name, pinfo->core_id, state, utc);
+
+free(utc);
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+
+ds_put_format(, "\n");
+out:
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 /* Dispatch heartbeats. */
 void
 dispatch_heartbeats(void)
@@ -412,6 +503,12 @@ ka_init(const struct smap *ovs_other_config)
 ka_init_status = ka_init_success;
 }
 
+unixctl_command_register("keepalive/status", "", 0, 0,
+  ka_unixctl_status, NULL);
+
+unixctl_command_register("keepalive/pmd-health-show", "", 0, 0,
+  ka_unixctl_pmd_health_show, ka_info);
+
 ovsthread_once_done(_enable);
 }
 }
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 5/7] keepalive: Retrieve PMD status periodically.

2017-08-22 Thread Bhanuprakash Bodireddy
This commit implements APIs to retrieve the PMD thread status and return
the status in the below format for each PMD thread.

  Format: pmdid="status,core id,last_seen_timestamp(epoch)"
  eg: pmd62="ALIVE,2,150332575"
  pmd63="GONE,3,150332525"

The status is periodically retrieved by keepalive thread and stored in
keepalive_stats struc which later shall be retrieved by vswitchd thread.
In case of four PMD threads the status is as below:

   "pmd62"="ALIVE,0,150332575"
   "pmd63"="ALIVE,1,150332575"
   "pmd64"="ALIVE,2,150332575"
   "pmd65"="ALIVE,3,150332575"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  1 +
 lib/keepalive.c   | 69 +++
 lib/keepalive.h   |  1 +
 3 files changed, 71 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 67ee424..8475a24 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -990,6 +990,7 @@ ovs_keepalive(void *f_)
 int n_pmds = cmap_count(>poll_threads) - 1;
 if (n_pmds > 0) {
 dispatch_heartbeats();
+get_ka_stats();
 }
 
 xusleep(get_ka_interval() * 1000);
diff --git a/lib/keepalive.c b/lib/keepalive.c
index 4ee89c0..9fd71b2 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -23,6 +23,7 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "ovs-thread.h"
 #include "process.h"
 #include "timeval.h"
 
@@ -33,6 +34,9 @@ static bool ka_init_status = ka_init_failure; /* Keepalive 
initialization */
 static uint32_t keepalive_timer_interval; /* keepalive timer interval */
 static struct keepalive_info *ka_info = NULL;
 
+static struct ovs_mutex mutex = OVS_MUTEX_INITIALIZER;
+static struct smap *keepalive_stats OVS_GUARDED_BY(mutex);
+
 inline bool
 ka_is_enabled(void)
 {
@@ -211,6 +215,71 @@ ka_get_timer_interval(const struct smap *ovs_other_config 
OVS_UNUSED)
 return ka_interval;
 }
 
+static void
+get_pmd_status(struct smap *ka_pmd_stats)
+OVS_REQUIRES(ka_info->proclist_mutex)
+{
+if (OVS_UNLIKELY(!ka_info)) {
+return;
+}
+
+struct ka_process_info *pinfo, *pinfo_next;
+HMAP_FOR_EACH_SAFE (pinfo, pinfo_next, node, _info->process_list) {
+int core_id = pinfo->core_id;
+char *state = NULL;
+if (pinfo->core_state == KA_STATE_UNUSED) {
+continue;
+}
+
+switch (pinfo->core_state) {
+case KA_STATE_ALIVE:
+state = "ALIVE";
+break;
+case KA_STATE_MISSING:
+state = "MISSING";
+break;
+case KA_STATE_DEAD:
+state = "DEAD";
+break;
+case KA_STATE_GONE:
+state = "GONE";
+break;
+case KA_STATE_DOZING:
+state = "DOZING";
+break;
+case KA_STATE_SLEEP:
+state = "SLEEP";
+break;
+case KA_STATE_UNUSED:
+break;
+}
+
+smap_add_format(ka_pmd_stats, pinfo->name, "%s,%d,%ld",
+state, core_id, pinfo->core_last_seen_times);
+}
+}
+
+void
+get_ka_stats(void)
+{
+struct smap *ka_pmd_stats;
+ka_pmd_stats = xmalloc(sizeof *ka_pmd_stats);
+smap_init(ka_pmd_stats);
+
+ovs_mutex_lock(_info->proclist_mutex);
+get_pmd_status(ka_pmd_stats);
+ovs_mutex_unlock(_info->proclist_mutex);
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+smap_destroy(keepalive_stats);
+free(keepalive_stats);
+keepalive_stats = NULL;
+}
+keepalive_stats = ka_pmd_stats;
+ovs_mutex_unlock();
+}
+
 /* Dispatch heartbeats. */
 void
 dispatch_heartbeats(void)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index a344006..f5da460 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -100,6 +100,7 @@ uint32_t get_ka_interval(void);
 int get_ka_init_status(void);
 int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
+void get_ka_stats(void);
 
 void dispatch_heartbeats(void);
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 6/7] bridge: Update keepalive status in OVSDB.

2017-08-22 Thread Bhanuprakash Bodireddy
This commit allows vswitchd thread to update the OVSDB with the
status of all registered PMD threads. The status can be monitored
using ovsdb-client and the sample output is below.

$ ovsdb-client monitor Open_vSwitch Open_vSwitch keepalive

rowaction keepalive
7b746190-ee71-4dcc-becf-f8cb9c7cb909 old  {
"pmd62"="ALIVE,0,9226457935188922"
"pmd63"="ALIVE,1,150678618"
"pmd64"="ALIVE,2,150678618"
"pmd65"="ALIVE,3,150678618"}

 new  {
"pmd62"="ALIVE,0,9226460230167364"
"pmd63"="ALIVE,1,150679619"
"pmd64"="ALIVE,2,150679619"
"pmd65"="ALIVE,3,150679619""}

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/keepalive.c   | 15 +++
 lib/keepalive.h   |  1 +
 vswitchd/bridge.c | 26 ++
 3 files changed, 42 insertions(+)

diff --git a/lib/keepalive.c b/lib/keepalive.c
index 9fd71b2..2497f00 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -280,6 +280,21 @@ get_ka_stats(void)
 ovs_mutex_unlock();
 }
 
+struct smap *
+ka_stats_run(void)
+{
+struct smap *ka_stats = NULL;
+
+ovs_mutex_lock();
+if (keepalive_stats) {
+ka_stats = keepalive_stats;
+keepalive_stats = NULL;
+}
+ovs_mutex_unlock();
+
+return ka_stats;
+}
+
 /* Dispatch heartbeats. */
 void
 dispatch_heartbeats(void)
diff --git a/lib/keepalive.h b/lib/keepalive.h
index f5da460..34f1c13 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -101,6 +101,7 @@ int get_ka_init_status(void);
 int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
 void get_ka_stats(void);
+struct smap *ka_stats_run(void);
 
 void dispatch_heartbeats(void);
 #endif /* keepalive.h */
diff --git a/vswitchd/bridge.c b/vswitchd/bridge.c
index aaa8d9b..9890c4f 100644
--- a/vswitchd/bridge.c
+++ b/vswitchd/bridge.c
@@ -286,6 +286,7 @@ static bool port_is_synthetic(const struct port *);
 
 static void reconfigure_system_stats(const struct ovsrec_open_vswitch *);
 static void run_system_stats(void);
+static void run_keepalive_stats(void);
 
 static void bridge_configure_mirrors(struct bridge *);
 static struct mirror *mirror_create(struct bridge *,
@@ -403,6 +404,7 @@ bridge_init(const char *remote)
 
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_cur_cfg);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_statistics);
+ovsdb_idl_omit_alert(idl, _open_vswitch_col_keepalive);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_datapath_types);
 ovsdb_idl_omit_alert(idl, _open_vswitch_col_iface_types);
 ovsdb_idl_omit(idl, _open_vswitch_col_external_ids);
@@ -2686,6 +2688,29 @@ run_system_stats(void)
 }
 }
 
+void
+run_keepalive_stats(void)
+{
+struct smap *ka_stats;
+const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(idl);
+
+ka_stats = ka_stats_run();
+if (ka_stats && cfg) {
+struct ovsdb_idl_txn *txn;
+struct ovsdb_datum datum;
+
+txn = ovsdb_idl_txn_create(idl);
+ovsdb_datum_from_smap(, ka_stats);
+smap_destroy(ka_stats);
+ovsdb_idl_txn_write(>header_, _open_vswitch_col_keepalive,
+);
+ovsdb_idl_txn_commit(txn);
+ovsdb_idl_txn_destroy(txn);
+
+free(ka_stats);
+}
+}
+
 static const char *
 ofp12_controller_role_to_str(enum ofp12_controller_role role)
 {
@@ -3033,6 +3058,7 @@ bridge_run(void)
 run_stats_update();
 run_status_update();
 run_system_stats();
+run_keepalive_stats();
 }
 
 void
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 4/7] dpif-netdev: Enable heartbeats for DPDK datapath.

2017-08-22 Thread Bhanuprakash Bodireddy
This commit adds heartbeat mechanism support for DPDK datapath. Heartbeats
are sent to registered PMD threads at predefined intervals (as set in ovsdb
with 'keepalive-interval').

The heartbeats are only enabled when there is atleast one port added to
the bridge and with active PMD thread polling the port.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  9 -
 lib/keepalive.c   | 41 +
 lib/keepalive.h   |  1 +
 3 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 84c7ffd..67ee424 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -980,11 +980,18 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 }
 
 static void *
-ovs_keepalive(void *f_ OVS_UNUSED)
+ovs_keepalive(void *f_)
 {
+struct dp_netdev *dp = f_;
+
 pthread_detach(pthread_self());
 
 for (;;) {
+int n_pmds = cmap_count(>poll_threads) - 1;
+if (n_pmds > 0) {
+dispatch_heartbeats();
+}
+
 xusleep(get_ka_interval() * 1000);
 }
 
diff --git a/lib/keepalive.c b/lib/keepalive.c
index dfafaeb..4ee89c0 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -211,6 +211,47 @@ ka_get_timer_interval(const struct smap *ovs_other_config 
OVS_UNUSED)
 return ka_interval;
 }
 
+/* Dispatch heartbeats. */
+void
+dispatch_heartbeats(void)
+{
+for (int core_id = 0; core_id < KA_DP_MAXCORES; core_id++) {
+if (ka_info->active_cores[core_id] == 0) {
+continue;
+}
+
+switch (ka_info->state_flags[core_id]) {
+case KA_STATE_UNUSED:
+break;
+case KA_STATE_ALIVE: /* Alive */
+ka_info->state_flags[core_id] = KA_STATE_MISSING;
+ka_info->last_alive[core_id] = time_wall_msec();
+break;
+case KA_STATE_MISSING: /* MIA */
+ka_info->state_flags[core_id] = KA_STATE_DEAD;
+break;
+case KA_STATE_DEAD: /* Dead */
+ka_info->state_flags[core_id] = KA_STATE_GONE;
+VLOG_DBG("Core %d died. ", core_id);
+break;
+case KA_STATE_GONE: /* Buried */
+break;
+case KA_STATE_DOZING: /* Core going idle */
+ka_info->state_flags[core_id] = KA_STATE_SLEEP;
+ka_info->last_alive[core_id] = time_wall_msec();
+break;
+case KA_STATE_SLEEP: /* Idled core */
+break;
+}
+
+if (ka_info->relay_cb) {
+ka_info->relay_cb(core_id, ka_info->state_flags[core_id],
+  ka_info->last_alive[core_id],
+  ka_info->relay_cb_data);
+}
+}
+}
+
 /*
  * This function shall be invoked periodically to write the core status and
  * last seen timestamp of the cores in to keepalive info structure.
diff --git a/lib/keepalive.h b/lib/keepalive.h
index 607ee3b..a344006 100644
--- a/lib/keepalive.h
+++ b/lib/keepalive.h
@@ -101,4 +101,5 @@ int get_ka_init_status(void);
 int ka_alloc_portstats(unsigned, int);
 void ka_destroy_portstats(void);
 
+void dispatch_heartbeats(void);
 #endif /* keepalive.h */
-- 
2.4.11

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v4 3/7] dpif-netdev: Register packet processing cores to KA framework.

2017-08-22 Thread Bhanuprakash Bodireddy
This commit registers the packet processing PMD cores to keepalive
framework. Only PMDs that have rxqs mapped will be registered and
actively monitored by KA framework.

This commit spawns a keepalive thread that will dispatch heartbeats to
PMD cores. The pmd threads respond to heartbeats by marking themselves
alive. As long as PMD responds to heartbeats it is considered 'healthy'.

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/dpif-netdev.c |  70 +
 lib/keepalive.c   | 153 ++
 lib/keepalive.h   |  17 ++
 lib/util.c|  23 
 lib/util.h|   2 +
 5 files changed, 254 insertions(+), 11 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e2cd931..84c7ffd 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -49,6 +49,7 @@
 #include "flow.h"
 #include "hmapx.h"
 #include "id-pool.h"
+#include "keepalive.h"
 #include "latch.h"
 #include "netdev.h"
 #include "netdev-vport.h"
@@ -978,6 +979,63 @@ sorted_poll_thread_list(struct dp_netdev *dp,
 *n = k;
 }
 
+static void *
+ovs_keepalive(void *f_ OVS_UNUSED)
+{
+pthread_detach(pthread_self());
+
+for (;;) {
+xusleep(get_ka_interval() * 1000);
+}
+
+return NULL;
+}
+
+static void
+ka_thread_start(struct dp_netdev *dp)
+{
+static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
+
+if (ovsthread_once_start()) {
+ovs_thread_create("ovs_keepalive", ovs_keepalive, dp);
+
+ovsthread_once_done();
+}
+}
+
+static void
+ka_register_datapath_threads(struct dp_netdev *dp)
+{
+int ka_init = get_ka_init_status();
+VLOG_DBG("Keepalive: Was initialization successful? [%s]",
+ka_init ? "Success" : "Failure");
+if (!ka_init) {
+return;
+}
+
+ka_thread_start(dp);
+
+struct dp_netdev_pmd_thread *pmd;
+CMAP_FOR_EACH (pmd, node, >poll_threads) {
+/*  Register only PMD threads. */
+if (pmd->core_id != NON_PMD_CORE_ID) {
+int tid = ka_get_pmd_tid(pmd->core_id);
+
+/* Skip PMD thread with no rxqs mapping. */
+if (OVS_UNLIKELY(!hmap_count(>poll_list))) {
+/* rxq mapping changes due to reconfiguration,
+ * if there are no rxqs mapped to PMD, unregister it. */
+ka_unregister_thread(tid, true);
+continue;
+}
+
+ka_register_thread(tid, true);
+VLOG_INFO("Registered PMD thread [%d] on Core [%d] to KA 
framework",
+  tid, pmd->core_id);
+}
+}
+}
+
 static void
 dpif_netdev_pmd_info(struct unixctl_conn *conn, int argc, const char *argv[],
  void *aux)
@@ -3625,6 +3683,9 @@ reconfigure_datapath(struct dp_netdev *dp)
 
 /* Reload affected pmd threads. */
 reload_affected_pmds(dp);
+
+/* Register datapath threads to KA monitoring. */
+ka_register_datapath_threads(dp);
 }
 
 /* Returns true if one of the netdevs in 'dp' requires a reconfiguration */
@@ -3824,6 +3885,8 @@ pmd_thread_main(void *f_)
 
 poll_list = NULL;
 
+ka_store_pmd_id(pmd->core_id);
+
 /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
 ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
 ovs_numa_thread_setaffinity_core(pmd->core_id);
@@ -3859,6 +3922,9 @@ reload:
   : PMD_CYCLES_IDLE);
 }
 
+/* Mark PMD thread alive. */
+ka_mark_pmd_thread_alive(pmd->core_id);
+
 if (lc++ > 1024) {
 bool reload;
 
@@ -3892,6 +3958,10 @@ reload:
 }
 
 emc_cache_uninit(>flow_cache);
+
+int tid = ka_get_pmd_tid(pmd->core_id);
+ka_unregister_thread(tid, true);
+
 free(poll_list);
 pmd_free_cached_ports(pmd);
 return NULL;
diff --git a/lib/keepalive.c b/lib/keepalive.c
index ac73a42..dfafaeb 100644
--- a/lib/keepalive.c
+++ b/lib/keepalive.c
@@ -23,6 +23,7 @@
 #include "keepalive.h"
 #include "lib/vswitch-idl.h"
 #include "openvswitch/vlog.h"
+#include "process.h"
 #include "timeval.h"
 
 VLOG_DEFINE_THIS_MODULE(keepalive);
@@ -48,6 +49,134 @@ ka_get_pmd_tid(unsigned core_idx)
 return -EINVAL;
 }
 
+/* Return the Keepalive timer interval. */
+inline uint32_t
+get_ka_interval(void)
+{
+return keepalive_timer_interval;
+}
+
+int
+get_ka_init_status(void)
+{
+return ka_init_status;
+}
+
+void
+ka_store_pmd_id(unsigned core_idx)
+{
+int tid = gettid();
+
+if (ka_is_enabled()) {
+ovs_assert(tid > 0);
+ka_info->thread_id[core_idx] = tid;
+}
+}
+
+/* Register thread to KA framework. */
+void
+ka_register_thread(int tid, bool thread_is_pmd)
+{
+if (ka_is_enabled()) {

[ovs-dev] [PATCH v4 2/7] Keepalive: Add initial keepalive support.

2017-08-22 Thread Bhanuprakash Bodireddy
This commit introduces the initial keepalive support by adding
'keepalive' module and also helper and initialization functions
that will be invoked by later commits.

This commit adds new ovsdb column "keepalive" that shows the status
of the datapath threads. This is implemented for DPDK datapath and
only status of PMD threads is reported.

For eg:
  To enable keepalive feature.
  'ovs-vsctl --no-wait set Open_vSwitch . other_config:enable-keepalive=true'

  To set timer interval of 5000ms for monitoring packet processing cores.
  'ovs-vsctl --no-wait set Open_vSwitch . \
 other_config:keepalive-interval="5000"

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/automake.mk|   2 +
 lib/keepalive.c| 183 +
 lib/keepalive.h|  87 +
 vswitchd/bridge.c  |   3 +
 vswitchd/vswitch.ovsschema |   8 +-
 vswitchd/vswitch.xml   |  49 
 6 files changed, 330 insertions(+), 2 deletions(-)
 create mode 100644 lib/keepalive.c
 create mode 100644 lib/keepalive.h

diff --git a/lib/automake.mk b/lib/automake.mk
index 2415f4c..0d99f0a 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -110,6 +110,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/json.c \
lib/jsonrpc.c \
lib/jsonrpc.h \
+   lib/keepalive.c \
+   lib/keepalive.h \
lib/lacp.c \
lib/lacp.h \
lib/latch.h \
diff --git a/lib/keepalive.c b/lib/keepalive.c
new file mode 100644
index 000..ac73a42
--- /dev/null
+++ b/lib/keepalive.c
@@ -0,0 +1,183 @@
+/*
+ * Copyright (c) 2014, 2015, 2016, 2017 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "keepalive.h"
+#include "lib/vswitch-idl.h"
+#include "openvswitch/vlog.h"
+#include "timeval.h"
+
+VLOG_DEFINE_THIS_MODULE(keepalive);
+
+static bool keepalive_enable = false;/* Keepalive disabled by default */
+static bool ka_init_status = ka_init_failure; /* Keepalive initialization */
+static uint32_t keepalive_timer_interval; /* keepalive timer interval */
+static struct keepalive_info *ka_info = NULL;
+
+inline bool
+ka_is_enabled(void)
+{
+return keepalive_enable;
+}
+
+inline int
+ka_get_pmd_tid(unsigned core_idx)
+{
+if (ka_is_enabled()) {
+return ka_info->thread_id[core_idx];
+}
+
+return -EINVAL;
+}
+
+void
+ka_set_pmd_state_ts(unsigned core_id, enum keepalive_state state,
+uint64_t last_alive)
+{
+struct ka_process_info *pinfo;
+int tid = ka_get_pmd_tid(core_id);
+
+ovs_mutex_lock(_info->proclist_mutex);
+HMAP_FOR_EACH_WITH_HASH (pinfo, node, hash_int(tid, 0),
+ _info->process_list) {
+if ((pinfo->core_id == core_id) && (pinfo->tid == tid)) {
+pinfo->core_state = state;
+pinfo->core_last_seen_times = last_alive;
+}
+}
+ovs_mutex_unlock(_info->proclist_mutex);
+}
+
+/* Retrieve and return the keepalive timer interval from OVSDB. */
+static uint32_t
+ka_get_timer_interval(const struct smap *ovs_other_config OVS_UNUSED)
+{
+#define OVS_KEEPALIVE_TIMEOUT 1000/* Default timeout set to 1000ms */
+uint32_t ka_interval;
+
+/* Timer granularity in milliseconds
+ * Defaults to OVS_KEEPALIVE_TIMEOUT(ms) if not set */
+ka_interval = smap_get_int(ovs_other_config, "keepalive-interval",
+  OVS_KEEPALIVE_TIMEOUT);
+
+VLOG_INFO("Keepalive timer interval set to %"PRIu32" (ms)\n", ka_interval);
+return ka_interval;
+}
+
+/*
+ * This function shall be invoked periodically to write the core status and
+ * last seen timestamp of the cores in to keepalive info structure.
+ */
+void
+ka_update_core_state(const int core_id, const enum keepalive_state core_state,
+ uint64_t last_alive, void *ptr_data OVS_UNUSED)
+{
+switch (core_state) {
+case KA_STATE_ALIVE:
+case KA_STATE_MISSING:
+ka_set_pmd_state_ts(core_id, KA_STATE_ALIVE, last_alive);
+break;
+case KA_STATE_UNUSED:
+case KA_STATE_DOZING:
+case KA_STATE_SLEEP:
+case KA_STATE_DEAD:
+case KA_STATE_GONE:
+ka_set_pmd_state_ts(core_id, core_state, last_alive);
+break

[ovs-dev] [PATCH v4 1/7] process: Extend get_process_info() for additional fields.

2017-08-22 Thread Bhanuprakash Bodireddy
This commit enables the fields relating to process name and the core
number the process was last scheduled. The APIs will be used by keepalive
monitoring framework in future commits.

This commit also fixes the following "sparse" warning:

  lib/process.c:439:16: error: use of assignment suppression and length
  modifier together in gnu_scanf format [-Werror=format=].

Signed-off-by: Bhanuprakash Bodireddy <bhanuprakash.bodire...@intel.com>
---
 lib/process.c | 43 +++
 lib/process.h |  2 ++
 2 files changed, 25 insertions(+), 20 deletions(-)

diff --git a/lib/process.c b/lib/process.c
index 3e119b5..95df112 100644
--- a/lib/process.c
+++ b/lib/process.c
@@ -64,7 +64,8 @@ struct raw_process_info {
 long long int uptime;   /* ms since started. */
 long long int cputime;  /* ms of CPU used during 'uptime'. */
 pid_t ppid; /* Parent. */
-char name[18];  /* Name (surrounded by parentheses). */
+int core_id;/* Core id last executed on. */
+char name[18];  /* Name. */
 };
 
 /* Pipe used to signal child termination. */
@@ -421,7 +422,7 @@ get_raw_process_info(pid_t pid, struct raw_process_info 
*raw)
 
 n = fscanf(stream,
"%*d "   /* (1. pid) */
-   "%17s "  /* 2. process name */
+   "(%17[^)]) " /* 2. process name */
"%*c "   /* (3. state) */
"%lu "   /* 4. ppid */
"%*d "   /* (5. pgid) */
@@ -444,33 +445,34 @@ get_raw_process_info(pid_t pid, struct raw_process_info 
*raw)
"%llu "  /* 22. start_time */
"%llu "  /* 23. vsize */
"%llu "  /* 24. rss */
+   "%*u "   /* (25. rsslim) */
+   "%*u "   /* (26. start_code) */
+   "%*u "   /* (27. end_code) */
+   "%*u "   /* (28. start_stack) */
+   "%*u "   /* (29. esp) */
+   "%*u "   /* (30. eip) */
+   "%*u "   /* (31. pending signals) */
+   "%*u "   /* (32. blocked signals) */
+   "%*u "   /* (33. ignored signals) */
+   "%*u "   /* (34. caught signals) */
+   "%*u "   /* (35. whcan) */
+   "%*u "   /* (36. always 0) */
+   "%*u "   /* (37. always 0) */
+   "%*d "   /* (38. exit_signal) */
+   "%d "/* 39. task_cpu */
 #if 0
/* These are here for documentation but #if'd out to save
 * actually parsing them from the stream for no benefit. */
-   "%*lu "  /* (25. rsslim) */
-   "%*lu "  /* (26. start_code) */
-   "%*lu "  /* (27. end_code) */
-   "%*lu "  /* (28. start_stack) */
-   "%*lu "  /* (29. esp) */
-   "%*lu "  /* (30. eip) */
-   "%*lu "  /* (31. pending signals) */
-   "%*lu "  /* (32. blocked signals) */
-   "%*lu "  /* (33. ignored signals) */
-   "%*lu "  /* (34. caught signals) */
-   "%*lu "  /* (35. whcan) */
-   "%*lu "  /* (36. always 0) */
-   "%*lu "  /* (37. always 0) */
-   "%*d "   /* (38. exit_signal) */
-   "%*d "   /* (39. task_cpu) */
"%*u "   /* (40. rt_priority) */
"%*u "   /* (41. policy) */
"%*llu " /* (42. blkio_ticks) */
"%*lu "  /* (43. gtime) */
"%*ld"   /* (44. cgtime) */
 #endif
-   , raw->name, , , , _time, , );
+   , raw->name, , , , _time,
+  , , >core_id);
 fclose(stream);
-if (n != 7) {
+if (n != 8) {
 VLOG_ERR_ONCE("%s: fscanf failed", file_name);
 return false;
 }
@@ -496,12 +498,14 @@ get_process_info(pid_t pid, struct process_info *pinfo)
 return false;
 }
 
+ovs_strlcpy(pinfo->name, child.name, sizeof pinfo->name);
 pinfo->vsz = child.vsz;
 pinfo->rss = child.rss;
 pinfo->booted = child.uptime;
 pinfo->crashes = 0;
 pinfo->uptime = child.uptime;
   

[ovs-dev] [PATCH v4 0/7] Add OVS DPDK keep-alive functionality.

2017-08-22 Thread Bhanuprakash Bodireddy
Keepalive feature is aimed at achieving Fastpath Service Assurance
in OVS-DPDK deployments. It adds support for monitoring the packet
processing cores(PMD thread cores) by dispatching heartbeats at regular
intervals. Incase of heartbeat misses additional health checks are
enabled on the PMD thread to detect the failure and the same shall be
reported to higher level fault management systems/frameworks.

The implementation uses OVSDB for reporting the health of the PMD threads.
Any external monitoring application can read the status from OVSDB at 
regular intervals (or) subscribe to the updates in OVSDB so that they get
notified when the changes happen on OVSDB.

keepalive info struct is created and initialized for storing the
status of the PMD threads. This is initialized by main thread(vswitchd)
as part of init process and will be periodically updated by 'keepalive'
thread. keepalive feature can be enabled through below OVSDB settings.

enable-keepalive=true
  - Keepalive feature is disabled by default.

keepalive-interval="5000"
  - Timer interval in milliseconds for monitoring the packet
processing cores.

When KA is enabled, 'ovs-keepalive' thread shall be spawned that wakes
up at regular intervals to update the timestamp and status of pmd cores
in keepalive info struct. This information shall be read by vswitchd thread
and write the status in to 'keepalive' column of Open_vSwitch table in OVSDB.

An external monitoring framework like collectd with ovs events support
can read (or) subscribe to the datapath status changes in ovsdb. When the state
is updated, the collectd shall be notified and will eventually relay the status
to ceilometer service running in the controller. Below is the high level
overview of deployment model.

Compute NodeControllerCompute Node

Collectd  <--> Ceilometer <>   Collectd

OvS DPDK   OvS DPDK

+-+
| VM  |
+--+--+
\---+---/
|
+--+---+   ++--+ +--+---+
| OVS  |-> |   ovsevents plugin| --> |   collectd   |
+--+---+   ++--+ +--+---+

+--+-+ +---++ |
| Ceilometer | <-- | collectd ceilometer plugin |  <---
+--+-+ +---++

github: The patches can be found here:
  https://github.com/bbodired/ovs (Last master commit e7cd8c363)

Performance impact:
  No noticeable performance or latency impact is observed with
  KA feature enabled.

-
v3 -> v4
  * Split the functionality in to 2 parts. This patch series only updates
PMD status to OVSDB. The incremental patch series to handle false positives,
negatives and more checking and stats. 
  * Remove code from netdev layer and dependency on rte_keepalive lib.
  * Merged few patches and simplified the patch series.
  * Timestamp in human readable form.

v2 -> v3
  * Rebase.
  * Verified with dpdk-stable-17.05.1 release.
  * Fixed build issues with MSVC and cross checked with appveyor.

v1 -> v2
  * Rebase
  * Drop 01/20 Patch "Consolidate process related APIs" of V1 as it
is already applied as separate patch.

RFCv3 -> v1
  * Made changes to fix failures in some unit test cases.
  * some more code cleanup w.r.t process related APIs.

RFCv2 -> RFCv3
  * Remove POSIX shared memory block implementation (suggested by Aaron).
  * Rework the logic to register and track threads instead of cores. This way
in the future any thread can be registered to KA framework. For now only PMD
threads are tracked (suggested by Aaron).
  * Refactor few APIs and further clean up the code.
   
RFCv1 -> RFCv2
  * Merged the xml and schema commits to later commit where the actual
implementation is done(suggested by Ben).
  * Fix ovs-appctl keepalive/* hang issue when KA disabled.
  * Fixed memory leaks with appctl commands for keepalive/pmd-health-show,
pmd-xstats-show.
  * Refactor code and fixed APIs dealing with PMD health monitoring.


Bhanuprakash Bodireddy (7):
  process: Extend get_process_info() for additional fields.
  Keepalive: Add initial keepalive support.
  dpif-netdev: Register packet processing cores to KA framework.
  dpif-netdev: Enable heartbeats for DPDK datapath.
  keepalive: Retrieve PMD status periodically.
  bridge: Update keepalive status in OVSDB.
  keepalive: Add support to query keepalive status and statistics.

 lib/automake.mk|   2 +
 lib/dpif-netdev.c  |  78 +++
 lib/keepalive.c| 536 +
 lib/keepalive.h| 107 +
 lib/process.c  |  43 ++--
 lib/process.h  |   2 +
 lib/util.c |  23 ++
 lib/util.h |   2 +
 vswitchd/bridge.c  |  29 

  1   2   3   >