Re: [ovs-dev] [PATCH] datapath-windows: Account for VLAN tag in tunnel Decap

2017-11-27 Thread Shashank Ram
Alin, can we apply this patch? It’s been sitting around in the mailing list for 
a week without any reviews after an ACK.

-- 
Thanks,
Shashank

On 11/21/17, 11:18 PM, "Anand Kumar"  wrote:

Acked-by: Anand Kumar 

Thanks,
Anand Kumar

On 11/20/17, 3:06 PM, "ovs-dev-boun...@openvswitch.org on behalf of 
Shashank Ram"  
wrote:

Decap functions for tunneling protocols do not compute
the packet header offsets correctly when there is a VLAN
tag in the L2 header. This results in incorrect checksum
computation causing the packet to be dropped.

This patch adds support to account for the VLAN tag in the
packet if its present, and makes use of the OvsExtractLayers()
function to correctly compute the header offsets for different
layers.

Testing done:
- Tested Geneve, STT, Vxlan and Gre and verified that there
  are no regressions.
- Verified that packets with VLAN tags are correctly handled
  in the decap code of all tunneling protocols. Previously,
  this would result in packet drops due to invalid checksums
  being computed.
- Verified that non-VLAN tagged packets are handled correctly.

Signed-off-by: Shashank Ram 
---
 datapath-windows/ovsext/Geneve.c  | 14 +
 datapath-windows/ovsext/Geneve.h  |  6 ++
 datapath-windows/ovsext/Gre.c | 29 --
 datapath-windows/ovsext/Gre.h | 16 ++
 datapath-windows/ovsext/Offload.c | 10 +
 datapath-windows/ovsext/Offload.h |  3 ++-
 datapath-windows/ovsext/Stt.c | 44 
+++
 datapath-windows/ovsext/Stt.h |  6 ++
 datapath-windows/ovsext/Vxlan.c   | 14 +
 datapath-windows/ovsext/Vxlan.h   |  6 ++
 10 files changed, 111 insertions(+), 37 deletions(-)

diff --git a/datapath-windows/ovsext/Geneve.c 
b/datapath-windows/ovsext/Geneve.c
index 6dca69b..210716d 100644
--- a/datapath-windows/ovsext/Geneve.c
+++ b/datapath-windows/ovsext/Geneve.c
@@ -262,10 +262,16 @@ NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT 
switchContext,
 PUINT8 bufferStart;
 PVOID optStart;
 NDIS_STATUS status;
+OVS_PACKET_HDR_INFO layers = { 0 };
+
+status = OvsExtractLayers(curNbl, &layers);
+if (status != NDIS_STATUS_SUCCESS) {
+return status;
+}

 /* Check the length of the UDP payload */
 curNb = NET_BUFFER_LIST_FIRST_NB(curNbl);
-tunnelSize = OvsGetGeneveTunHdrMinSize();
+tunnelSize = OvsGetGeneveTunHdrSizeFromLayers(&layers);
 packetLength = NET_BUFFER_DATA_LENGTH(curNb);
 if (packetLength <= tunnelSize) {
 return NDIS_STATUS_INVALID_LENGTH;
@@ -295,13 +301,13 @@ NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT 
switchContext,

 ethHdr = (EthHdr *)bufferStart;
 /* XXX: Handle IP options. */
-ipHdr = (IPHdr *)((PCHAR)ethHdr + sizeof *ethHdr);
+ipHdr = (IPHdr *)(bufferStart + layers.l3Offset);
 tunKey->src = ipHdr->saddr;
 tunKey->dst = ipHdr->daddr;
 tunKey->tos = ipHdr->tos;
 tunKey->ttl = ipHdr->ttl;
 tunKey->pad = 0;
-udpHdr = (UDPHdr *)((PCHAR)ipHdr + sizeof *ipHdr);
+udpHdr = (UDPHdr *)(bufferStart + layers.l4Offset);

 /* Validate if NIC has indicated checksum failure. */
 status = OvsValidateUDPChecksum(curNbl, udpHdr->check == 0);
@@ -312,7 +318,7 @@ NDIS_STATUS OvsDecapGeneve(POVS_SWITCH_CONTEXT 
switchContext,
 /* Calculate and verify UDP checksum if NIC didn't do it. */
 if (udpHdr->check != 0) {
 status = OvsCalculateUDPChecksum(curNbl, curNb, ipHdr, udpHdr,
- packetLength);
+ packetLength, &layers);
 tunKey->flags |= OVS_TNL_F_CSUM;
 if (status != NDIS_STATUS_SUCCESS) {
 goto dropNbl;
diff --git a/datapath-windows/ovsext/Geneve.h 
b/datapath-windows/ovsext/Geneve.h
index 019c0dd..db758dd 100644
--- a/datapath-windows/ovsext/Geneve.h
+++ b/datapath-windows/ovsext/Geneve.h
@@ -113,6 +113,12 @@ OvsGetGeneveTunHdrMaxSize(VOID)
 return OvsGetGeneveTunHdrMinSize() + TUN_OPT_MAX_LEN;
 }

+static __inline UINT32
+OvsGetGeneveTunHdrSizeFromLayers(POVS_PACKET_HDR_INFO layers)
+{
+return layers->l7Offset + sizeof(GeneveHdr);
+}
+
 #define GENEVE_UDP_PORT 6081
 

Re: [ovs-dev] [PATCH] openvswitch: use ktime_get_ts64() instead of ktime_get_ts()

2017-11-27 Thread Pravin Shelar
On Mon, Nov 27, 2017 at 5:11 PM, Arnd Bergmann  wrote:
> timespec is deprecated because of the y2038 overflow, so let's convert
> this one to ktime_get_ts64(). The code is already safe even on 32-bit
> architectures, since it uses monotonic times. On 64-bit architectures,
> nothing changes, while on 32-bit architectures this avoids one
> type conversion.
>
> Signed-off-by: Arnd Bergmann 
> ---
>  net/openvswitch/flow.c | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
> index dbe2379329c5..76d050aba7a4 100644
> --- a/net/openvswitch/flow.c
> +++ b/net/openvswitch/flow.c
> @@ -56,12 +56,12 @@
>
>  u64 ovs_flow_used_time(unsigned long flow_jiffies)
>  {
> -   struct timespec cur_ts;
> +   struct timespec64 cur_ts;
> u64 cur_ms, idle_ms;
>
> -   ktime_get_ts(&cur_ts);
> +   ktime_get_ts64(&cur_ts);
> idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
> -   cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
> +   cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +

I am not sure why is tv_sec converted to u32.

>  cur_ts.tv_nsec / NSEC_PER_MSEC;
>
> return cur_ms - idle_ms;
> --
> 2.9.0
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [patch v2 0/3] conntrack: Alg improvements.

2017-11-27 Thread Aaron Conole
Darrell Ball  writes:

> Some refactoring of alg support is done.
> Also algs are disabled by default unless an alg specifier
> is supplied; this allows for enhanced security and matches
> later kernels.
> Another change to allow for non-standard alg conntrol
> port specification.
>  
>
> Darrell Ball (3):
>   conntrack: Refactor algs.
>   conntrack: Allow specified alg port numbers.
>   conntrack: Disable algs by default.
>
>  lib/conntrack.c| 168 
> ++---
>  lib/conntrack.h|   8 +--
>  lib/dpif-netdev.c  |   4 +-
>  tests/test-conntrack.c |   6 +-
>  4 files changed, 127 insertions(+), 59 deletions(-)

For the series -

Acked-by: Aaron Conole 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] Update mailing list archive pointers to the current server.

2017-11-27 Thread Justin Pettit

> On Nov 27, 2017, at 2:57 PM, Ben Pfaff  wrote:
> 
> On Mon, Nov 27, 2017 at 02:28:37PM -0800, Justin Pettit wrote:
>> Signed-off-by: Justin Pettit 
> 
> If you checked that the new URLs are correct (I did not), then:
> Acked-by: Ben Pfaff 

Thanks.  I pushed this to master.  I'll wait a day or two for the other.

--Justin


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] Update mailing list archive pointers to the current server.

2017-11-27 Thread Ben Pfaff
On Mon, Nov 27, 2017 at 02:28:37PM -0800, Justin Pettit wrote:
> Signed-off-by: Justin Pettit 

If you checked that the new URLs are correct (I did not), then:
Acked-by: Ben Pfaff 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] ovn/TODO: Remove some completed items.

2017-11-27 Thread Ben Pfaff
On Mon, Nov 27, 2017 at 02:28:38PM -0800, Justin Pettit wrote:
> Signed-off-by: Justin Pettit 

I think that these changes are correct.  You might want to wait a day or
so to see if anyone else speaks up.

Acked-by: Ben Pfaff 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Revert "dpif_netdev: Refactor dp_netdev_pmd_thread structure."

2017-11-27 Thread Ben Pfaff
On Mon, Nov 27, 2017 at 02:59:29PM +0300, Ilya Maximets wrote:
> > I also verified the other case when posix_memalign isn't available and even 
> > in that case
> > it returns the address aligned on CACHE_LINE_SIZE boundary. I will send out 
> > a patch to use
> >  xzalloc_cacheline for allocating the memory.
> 
> I don't know how you tested this, because it is impossible:
> 
>   1. OVS allocates some memory:
>   base = xmalloc(...);
> 
>   2. Rounds it up to the cache line start:
>   payload = (void **) ROUND_UP((uintptr_t) base, CACHE_LINE_SIZE);
> 
>   3. Returns the pointer increased by 8 bytes:
>   return (char *) payload + MEM_ALIGN;
> 
> So, unless you redefined MEM_ALIGN to zero, you will never get aligned memory
> address while allocating by xmalloc_cacheline() on system without 
> posix_memalign().

Maybe we should make the non-HAVE_POSIX_MEMALIGN case better.  Something
like this (compile tested only);

diff --git a/lib/util.c b/lib/util.c
index 9e6edd27ae4c..137091a3cd4f 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -196,15 +196,9 @@ x2nrealloc(void *p, size_t *n, size_t s)
 return xrealloc(p, *n * s);
 }
 
-/* The desired minimum alignment for an allocated block of memory. */
-#define MEM_ALIGN MAX(sizeof(void *), 8)
-BUILD_ASSERT_DECL(IS_POW2(MEM_ALIGN));
-BUILD_ASSERT_DECL(CACHE_LINE_SIZE >= MEM_ALIGN);
-
-/* Allocates and returns 'size' bytes of memory in dedicated cache lines.  That
- * is, the memory block returned will not share a cache line with other data,
- * avoiding "false sharing".  (The memory returned will not be at the start of
- * a cache line, though, so don't assume such alignment.)
+/* Allocates and returns 'size' bytes of memory aligned to a cache line and in
+ * dedicated cache lines.  That is, the memory block returned will not share a
+ * cache line with other data, avoiding "false sharing".
  *
  * Use free_cacheline() to free the returned memory block. */
 void *
@@ -221,28 +215,37 @@ xmalloc_cacheline(size_t size)
 }
 return p;
 #else
-void **payload;
-void *base;
-
 /* Allocate room for:
  *
- * - Up to CACHE_LINE_SIZE - 1 bytes before the payload, so that the
- *   start of the payload doesn't potentially share a cache line.
+ * - Header padding: Up to CACHE_LINE_SIZE - 1 bytes, to allow the
+ *   pointer to be aligned exactly sizeof(void *) bytes before the
+ *   beginning of a cache line.
  *
- * - A payload consisting of a void *, followed by padding out to
- *   MEM_ALIGN bytes, followed by 'size' bytes of user data.
+ * - Pointer: A pointer to the start of the header padding, to allow us
+ *   to free() the block later.
  *
- * - Space following the payload up to the end of the cache line, so
- *   that the end of the payload doesn't potentially share a cache line
- *   with some following block. */
-base = xmalloc((CACHE_LINE_SIZE - 1)
-   + ROUND_UP(MEM_ALIGN + size, CACHE_LINE_SIZE));
-
-/* Locate the payload and store a pointer to the base at the beginning. */
-payload = (void **) ROUND_UP((uintptr_t) base, CACHE_LINE_SIZE);
-*payload = base;
-
-return (char *) payload + MEM_ALIGN;
+ * - User data: 'size' bytes.
+ *
+ * - Trailer padding: Enough to bring the user data up to a cache line
+ *   multiple.
+ *
+ * +---+-++-+
+ * | header| pointer | user data  | trailer |
+ * +---+-++-+
+ * ^   ^ ^
+ * |   | |
+ * p   q r
+ *
+ */
+void *p = xmalloc((CACHE_LINE_SIZE - 1)
+  + sizeof(void *)
+  + ROUND_UP(size, CACHE_LINE_SIZE));
+bool runt = PAD_SIZE((uintptr_t) p, CACHE_LINE_SIZE) < sizeof(void *);
+void *r = (void *) ROUND_UP((uintptr_t) p + (runt ? CACHE_LINE_SIZE : 0),
+CACHE_LINE_SIZE);
+void **q = (void **) r - 1;
+*q = p;
+return r;
 #endif
 }
 
@@ -265,7 +268,8 @@ free_cacheline(void *p)
 free(p);
 #else
 if (p) {
-free(*(void **) ((uintptr_t) p - MEM_ALIGN));
+void **q = (void **) p - 1;
+free(*q);
 }
 #endif
 }
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/2] ovn/TODO: Remove some completed items.

2017-11-27 Thread Justin Pettit
Signed-off-by: Justin Pettit 
---
 ovn/TODO.rst | 69 
 1 file changed, 69 deletions(-)

diff --git a/ovn/TODO.rst b/ovn/TODO.rst
index f8569b357281..34615b9bc7d4 100644
--- a/ovn/TODO.rst
+++ b/ovn/TODO.rst
@@ -27,22 +27,9 @@ OVN To-do List
 
 * Work out database for clustering or HA properly.
 
-* Compromised chassis mitigation.
-
-  Possibly depends on database solution.
-
-  Latest discussion:
-
-  https://mail.openvswitch.org/pipermail/ovs-dev/2016-August/321649.html
-
 * Get incremental updates in ovn-controller and ovn-northd in some
   sensible way.
 
-* Testing improvements, possibly heavily based on ovn-trace.
-
-  Justin Pettit: "I'm planning to write some ovn-trace tests for IPv6.
-  Hopefully we can get those into 2.6."
-
 * Self-managing HA for ovn-northd (avoiding the need to set up
   independent tooling for fail-over).
 
@@ -61,11 +48,6 @@ OVN To-do List
   Russell Bryant: "Today that would require creating 4096 ports for the VM and
   attach to 4096 OVN networks, so doable, but not quite ideal."
 
-* Native DNS support
-
-  Russell Bryant: "This is an OpenStack requirement to fully eliminate the DHCP
-  agent."
-
 * Service function chaining.
 
 * MAC learning.
@@ -119,42 +101,6 @@ OVN To-do List
 
   * MTU handling (fragmentation on output)
 
-* Security
-
-  * Limiting the impact of a compromised chassis.
-
-Every instance of ovn-controller has the same full access to the central
-OVN_Southbound database.  This means that a compromised chassis can
-interfere with the normal operation of the rest of the deployment.  Some
-specific examples include writing to the logical flow table to alter
-traffic handling or updating the port binding table to claim ports that are
-actually present on a different chassis.  In practice, the compromised host
-would be fighting against ovn-northd and other instances of ovn-controller
-that would be trying to restore the correct state.  The impact could
-include at least temporarily redirecting traffic (so the compromised host
-could receive traffic that it shouldn't) and potentially a more general
-denial of service.
-
-There are different potential improvements to this area.  The first would
-be to add some sort of ACL scheme to ovsdb-server.  A proposal for this
-should first include an ACL scheme for ovn-controller.  An example policy
-would be to make Logical_Flow read-only.  Table-level control is needed,
-but is not enough.  For example, ovn-controller must be able to update the
-Chassis and Encap tables, but should only be able to modify the rows
-associated with that chassis and no others.
-
-A more complex example is the Port_Binding table.  Currently,
-ovn-controller is the source of truth of where a port is located.  There
-seems to be  no policy that can prevent malicious behavior of a compromised
-host with this table.
-
-An alternative scheme for port bindings would be to provide an optional
-mode where an external entity controls port bindings and make them
-read-only to ovn-controller.  This is actually how OpenStack works today,
-for example.  The part of OpenStack that manages VMs (Nova) tells the
-networking component (Neutron) where a port will be located, as opposed to
-the networking component discovering it.
-
 * ovsdb-server
 
   ovsdb-server should have adequate features for OVN but it probably needs work
@@ -220,23 +166,8 @@ OVN To-do List
 We need to find a proper solution to solve this issue instead of increasing
 the inactivity_probe value.
 
-* Consider the use of BFD as tunnel monitor.
-
-  The use of BFD for hypervisor-to-hypervisor tunnels is probably not worth it,
-  since there's no alternative to switch to if a tunnel goes down.  It could
-  make sense at a slow rate if someone does OVN monitoring system integration,
-  but not otherwise.
-
-  When OVN gets to supporting HA for gateways (see ovn/OVN-GW-HA.rst), BFD is
-  likely needed as a part of that solution.
-
-  There's more commentary in this ML post:
-  https://mail.openvswitch.org/pipermail/ovs-dev/2015-November/305928.html
-
 * ACL
 
   * Support FTP ALGs.
 
   * Support reject action.
-
-  * Support log option.
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/2] Update mailing list archive pointers to the current server.

2017-11-27 Thread Justin Pettit
Signed-off-by: Justin Pettit 
---
 Documentation/internals/contributing/submitting-patches.rst | 2 +-
 Documentation/internals/security.rst| 2 +-
 lib/netdev-linux.c  | 6 +++---
 lib/ovs-thread.c| 2 +-
 ovn/TODO.rst| 4 ++--
 5 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/Documentation/internals/contributing/submitting-patches.rst 
b/Documentation/internals/contributing/submitting-patches.rst
index ac828cd6b879..eb7e651b85a9 100644
--- a/Documentation/internals/contributing/submitting-patches.rst
+++ b/Documentation/internals/contributing/submitting-patches.rst
@@ -252,7 +252,7 @@ Examples of common tags follow.
 
   ::
 
-  Reported-at: http://openvswitch.org/pipermail/dev/2014-June/040952.html
+  Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2014-June/284495.html
 
 ``Submitted-at: ``
 
diff --git a/Documentation/internals/security.rst 
b/Documentation/internals/security.rst
index da7b9b0f8af4..f6a31ad01116 100644
--- a/Documentation/internals/security.rst
+++ b/Documentation/internals/security.rst
@@ -221,7 +221,7 @@ sections for the document include:
   tags, such as Acked-by tags obtained during review.
 
 `CVE-2016-2074
-`__
+`__
 is an example advisory document.
 
 Step 3b: Fix
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index f7205e67..e809b8877a83 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -144,7 +144,7 @@ struct tpacket_auxdata {
  *
  * To avoid revisiting problems reported with using configure to detect
  * compatibility (see report at
- * http://openvswitch.org/pipermail/dev/2014-October/047978.html)
+ * https://mail.openvswitch.org/pipermail/ovs-dev/2014-October/291521.html)
  * unconditionally replace ethtool_cmd_speed. */
 #define ethtool_cmd_speed rpl_ethtool_cmd_speed
 static inline uint32_t rpl_ethtool_cmd_speed(const struct ethtool_cmd *ep)
@@ -182,8 +182,8 @@ static inline uint32_t rpl_ethtool_cmd_speed(const struct 
ethtool_cmd *ep)
  *
  * Tests for rtnl_link_stats64 don't seem to consistently work, e.g. on
  * 2.6.32-431.29.2.el6.x86_64 (see report at
- * http://openvswitch.org/pipermail/dev/2014-October/047978.html).  Maybe
- * if_link.h is not self-contained on those kernels.  It is easiest to
+ * https://mail.openvswitch.org/pipermail/ovs-dev/2014-October/291521.html).
+ * Maybe if_link.h is not self-contained on those kernels.  It is easiest to
  * unconditionally define a replacement. */
 #ifndef IFLA_STATS64
 #define IFLA_STATS64 23
diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
index d0c0c09bde49..f8bc06d382c8 100644
--- a/lib/ovs-thread.c
+++ b/lib/ovs-thread.c
@@ -402,7 +402,7 @@ ovs_thread_create(const char *name, void *(*start)(void *), 
void *arg)
 
 /* Some small systems use a default stack size as small as 80 kB, but OVS
  * requires approximately 384 kB according to the following analysis:
- * http://openvswitch.org/pipermail/dev/2016-January/065049.html
+ * https://mail.openvswitch.org/pipermail/ovs-dev/2016-January/308592.html
  *
  * We use 512 kB to give us some margin of error. */
 pthread_attr_t attr;
diff --git a/ovn/TODO.rst b/ovn/TODO.rst
index d0e62bfbb919..f8569b357281 100644
--- a/ovn/TODO.rst
+++ b/ovn/TODO.rst
@@ -33,7 +33,7 @@ OVN To-do List
 
   Latest discussion:
 
-  http://openvswitch.org/pipermail/dev/2016-August/078106.html
+  https://mail.openvswitch.org/pipermail/ovs-dev/2016-August/321649.html
 
 * Get incremental updates in ovn-controller and ovn-northd in some
   sensible way.
@@ -231,7 +231,7 @@ OVN To-do List
   likely needed as a part of that solution.
 
   There's more commentary in this ML post:
-  http://openvswitch.org/pipermail/dev/2015-November/062385.html
+  https://mail.openvswitch.org/pipermail/ovs-dev/2015-November/305928.html
 
 * ACL
 
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] netdev-tc-offloads: Use customary types for buffer.

2017-11-27 Thread Ben Pfaff
This function uses local array set_buff[] to store Netlink attributes.
It declares set_buff as an array of character pointers, which is a strange
type for a buffer of non-character-pointer objects.  In OVS it is
customary to use an ofpbuf with a stub of uint64_t objecs (to ensure
proper alignment, otherwise uint8_t would be more usual).  This commit
changes to that more usual form.

Signed-off-by: Ben Pfaff 
---
 lib/netdev-tc-offloads.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 1f77b5595594..9364d94f05ef 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -581,18 +581,17 @@ parse_put_flow_set_masked_action(struct tc_flower *flower,
  bool hasmask)
 {
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 20);
-char *set_buff[128], *set_data, *set_mask;
+uint64_t set_stub[1024 / 8];
+struct ofpbuf set_buf = OFPBUF_STUB_INITIALIZER(set_stub);
+char *set_data, *set_mask;
 char *key = (char *) &flower->rewrite.key;
 char *mask = (char *) &flower->rewrite.mask;
 const struct nlattr *attr;
 int i, j, type;
 size_t size;
 
-ovs_assert(set_len <= 128);
-
 /* copy so we can set attr mask to 0 for used ovs key struct members  */
-memcpy(set_buff, set, set_len);
-attr = (struct nlattr *) set_buff;
+attr = ofpbuf_put(&set_buf, set, set_len);
 
 type = nl_attr_type(attr);
 size = nl_attr_get_size(attr) / 2;
@@ -602,6 +601,7 @@ parse_put_flow_set_masked_action(struct tc_flower *flower,
 if (type >= ARRAY_SIZE(set_flower_map)
 || !set_flower_map[type][0].size) {
 VLOG_DBG_RL(&rl, "unsupported set action type: %d", type);
+ofpbuf_uninit(&set_buf);
 return EOPNOTSUPP;
 }
 
@@ -634,9 +634,11 @@ parse_put_flow_set_masked_action(struct tc_flower *flower,
 if (hasmask && !is_all_zeros(set_mask, size)) {
 VLOG_DBG_RL(&rl, "unsupported sub attribute of set action type %d",
 type);
+ofpbuf_uninit(&set_buf);
 return EOPNOTSUPP;
 }
 
+ofpbuf_uninit(&set_buf);
 return 0;
 }
 
-- 
2.10.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK: dpdk_merge pull request

2017-11-27 Thread Ben Pfaff
On Tue, Nov 21, 2017 at 01:14:53PM +, Stokes, Ian wrote:
> > On Fri, Nov 17, 2017 at 07:27:51PM +, Stokes, Ian wrote:
> > > The following changes since commit
> > 1ae83bb20677b42d63dbb2140fa8ed3144c6260f:
> > >
> > >   netdev-tc-offloads: Add support for action set (2017-11-16 08:10:29
> > > -0800)
> > >
> > > are available in the git repository at:
> > >
> > >   https://github.com/istokes/ovs dpdk_merge
> > >
> > > for you to fetch changes up to af5b0dad30359d14d6412eb80e81783a83a678ce:
> > >
> > >   netdev-dpdk: Fix mempool creation with large MTU. (2017-11-17
> > > 16:26:33 +)
> > 
> > Thanks a lot.  I merged this branch into master.
> 
> Thanks Ben.
> 
> > 
> > This yields a new "sparse" warning:
> > ../lib/netdev-tc-offloads.c:584:20: warning: Variable length array is
> > used.
> > because of this declaration in parse_put_flow_set_masked_action():
> > char *set_buff[set_len], *set_data, *set_mask;
> > 
> 
> So this was introduced in commit 1ae83bb20677b42d63dbb2140fa8ed3144c6260f: 
> netdev-tc-offloads: Add support for action set. This was pushed just after I 
> had submitted the pull request (but wasn't actually part of the pull request 
> patches).
> 
> It's strange though, I ran sparse before submitting the pull request and it 
> came back clean. Checking after the merge and I don't see the warning you 
> see. (Normally I would not submit a pull request if a sparse warning 
> occurred). What version of sparse are you using out of interest? I'm using 
> sparse-0.5.0-10.
>  
> > It may or may not be worth fixing that.  In favor of fixing it is keeping
> > OVS sparse warning free, but against it is that the replacement would
> > probably be malloc(), which is slower.
> > 
> > However, looking a bit closer (now that I've already done the merge), I
> > think there is a bug here.  set_buff[] is declared as char
> > *set_buff[set_len], which has size (set_len * sizeof(char *)), but only
> > set_len bytes of it are used.  I think that the right declaration would be
> > more like uint8_t set_buff[set_len];, although that would lack the proper
> > alignment for struct nl_attr.
> > 
> > Maybe something like this would be the appropriate fix for both issues.
> > Will you please review it?
> 
> Sure, will be happy to review, but would also be good to get someone with 
> tc-offload experience to sign off also.
> 
> Roi has also submitted a patch to fix the issue, maybe Roi could review below 
> also?

Roi's patch got merged, so that fixes the warning.

I still think there is some merit in my additional changes, so I'll
submit a revised patch.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Restore flows is slow using "ovs-save", is there someone who use RPC to speed up flow restore?

2017-11-27 Thread Ben Pfaff
On Fri, Nov 24, 2017 at 11:07:07AM +0800, Sam wrote:
> I'm working on speed up ovs restart, I found that restore flows is slow
> when there are lots of flows, so I want to use RPC to store flows in
> another process, and then restore from it.

What RPC do you mean?  Why do you think that your process will be faster
than ovs-ofctl?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Euro-Million Award Attached!

2017-11-27 Thread Howard Bruce via dev


 
 

 
 

 
 

 
 

 
 

 
 

 
 

 
 

 
 

 
 

  
 

 
 

 
 

 
 


   

   

   

   

   

   

   

   

   

   

   

   

   

   ___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [CONNTRACK] Discussions at OvS 2017

2017-11-27 Thread Ben Pfaff
Darrell, I think that you are working on some of the specific items that
Aaron listed.  Can you comment on that?

On Sat, Nov 25, 2017 at 07:37:23PM +, Darrell Ball wrote:
> Let me clarify some general points.
> 
> 1/ We will not be porting any code from the Linux kernel, whether it be SIP 
> module code or anything else.
> 
> 2/ Furthermore, we will not be using proprietary code algorithms and other 
> aspects from the Linux kernel.
> 
> 3/ Furthermore, those people working in, having worked in, or having been 
> exposed to Netfilter code need to
> be careful about any “cross-pollination”, intentional or otherwise.
> 
> 4/ In addition, in general, we don’t think it is necessary to copy the 
> behavior of the kernel; we look at
> each behavior and see if it makes sense or is necessary on its own technical 
> merits. If we don’t think it
> necessary, overly complex,  or we have something better, we can omit that 
> behavior entirely or do it differently.
> 
> Thanks Darrell
> 
> 
> 
> On 11/25/17, 9:47 AM, "ovs-dev-boun...@openvswitch.org on behalf of Tiago 
> Lam"  wrote:
> 
> Hi Aaron (and all),
> 
> Thank you for your email, I found it to be informative.
> 
> Would you mind elaborating a bit more on point number 5 below?
> 
> With regards to SIP (don't know about SCTP), a module [1] seems to already
> exist for the kernel, integrating with Netfilter / conntrack.
> 
> So, in terms of support for SIP in OVS' userspace conntrack (or any other 
> new
> protocol for that matter, but I'm most familiar with SIP), would OVS be
> looking for a port, or a "from scratch" implementation? Or what would be 
> the
> preference here?
> 
> Thanks and regards,
> 
> Tiago.
> 
> [1] 
> https://urldefense.proofpoint.com/v2/url?u=https-3A__people.netfilter.org_chentschel_docs_sip-2Dconntrack-2Dnat.html&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=FfTX_mff2SgY4OCYC5YbZOO44fljLYgir7rk9fccjRQ&s=Pfb29ck4yGzJEN3CgHQWatvLFA-XFUuMG_UWAKoutN0&e=
> 
> On 11/20/2017 06:21 PM, Aaron Conole wrote:
> > (NOTE: This is a resend - I fat-fingered the ovs email.  Apologies to
> > those who got duplicates).
> > 
> > This email is meant to summarize some of the discussions we had at OvS
> > conference.
> > 
> > The interest in the userspace conntrack is heating up.  That's a good
> > thing, but it means that we'll probably have some growing pains as it
> > relates to features and usages.  These are the topics and some
> > additional information that we came up with.
> > 
> > 1. Making EST state match between linux kernel conntrack and userspace
> >conntrack.  We will need to make sure that the windows datapath
> >conntrack also matches.  The issue came down to the distinction about
> >whether commit action should also imply that the connection is
> >established.
> > 
> > 2. Disabling conntrack helpers 'on by default' in the userspace
> >datapath.  This represents possible security issue; users will want
> >to disable helpers (or enable helpers) at their own discretion.
> >One proposed resolution to this is simply disabling the helpers, and
> >relying on the 'alg=' specifier in the conntrack action. Complicating
> >this solution is existing users who rely on the existing solution -
> >specifically those users with the tftp helper and pxe booting in an
> >large data center.
> > 
> > 3. Performance analysis deep-dive.  We'd like to get input on the
> >performance of the userspace conntrack path.  There was discussion
> >that the performance was suffering - anything here like additional
> >tests people have, or data that can be shared is good.  The
> >development community is interested in knowing what it means.
> > 
> > 4. Hardware offload.  What will we need to present from that standpoint?
> >Are there counters or other information that software will need to
> >expose?  What does it mean for the userspace datapath to be aware of
> >hardware offload for conntrack?
> > 
> > 5. Additional protocol support and helpers.  We think SCTP, and SIP are
> >important.  Any others?  Anyone think this is useful work to do?
> > 
> > Thanks all for the presentations and discussions.
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > 
> https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=FfTX_mff2SgY4OCYC5YbZOO44fljLYgir7rk9fccjRQ&s=0oFbn_OEVTacCVLYQVealZJQelpkp-hZmBriL5kLV_s&e=
> > 
> ___
> dev mailing list
> d...@openvswitch.org
> 
> https://urldefense.proofpoint.com/v2/url?u=https-3A__m

Re: [ovs-dev] In ovs-2.4 version, why can't stop dpdk-bond port by `ovs-ofctl mod br dpdkbond DOWN`?

2017-11-27 Thread Ben Pfaff
On Mon, Nov 27, 2017 at 06:07:35PM +0800, Sam wrote:
> I'm working in ovs 2.4 version, and I use "dpdkb" type netdev. Then I
> call  `ovs-ofctl mod br dpdkbond DOWN` to down this device, at last, my
> code call "rte_eth_dev_stop(dev->port_id);".
> 
> But counter shows the port is still rx/tx, why?

OVS 2.4 is pretty old and no longer supported.  You should probably try
a newer version.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] odp-util: Fix buffer overread in parsing string form of ODP flows.

2017-11-27 Thread Ben Pfaff
On Sun, Nov 26, 2017 at 09:23:08PM -0800, Joe Stringer wrote:
> On 26 November 2017 at 17:41, Ben Pfaff  wrote:
> > scan_u128() should return 0 on an error but it actually returned an errno
> > value in some cases, so a command like this:
> > ovs-appctl dpctl/add-flow 'ct_label(1/55)' ''
> > could cause a buffer overread.
> >
> > This bug is not as severe as it may sound because the string form of ODP
> > flows is not used over OpenFlow or OVSDB, only through the appctl interface
> > that is normally used just by local system administrators and not exposed
> > over a network.
> >
> > Reported-by: Bhargava Shastry 
> > Signed-off-by: Ben Pfaff 
> > ---
> 
> Acked-by: Joe Stringer 

Thanks, Joe.  I applied this to master and backported as far as
branch-2.5.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 0/2] OVN DB log fixes

2017-11-27 Thread Ben Pfaff
OK, I did these backports.

For branch-2.7 I had to backport all of the following commits to make
these patches apply cleanly:

commit 2abbe32153b7e4719b39f477b35e7cc40231338a
Author: Numan Siddique 
Date:   Wed Nov 8 14:28:49 2017 +0530

ovn-ctl: Add -vfile:info option to OVN_NB/SB_LOG options

In the RHEL environment, when OVN db servers are started using ovn-ctl,
log files are empty. Adding "-vfile:info" option to ovsdb-server is
resolving this issue. Running 'ovs-apptctl -t .. vlog/reopen" results in the
logs appearing in the log files. This issue is seen with 2.7.2.

"-vfile:info" option is passed to ovn-northd and ovn-controller when 
starting.
There is no harm in adding this to OVN db servers.

Signed-off-by: Numan Siddique 
Signed-off-by: Ben Pfaff 

commit 7c8ef11c7571e377975b297a2df5564d481c467b
Author: Numan Siddique 
Date:   Wed Nov 8 14:29:07 2017 +0530

OpenvSwitch logrotate: Use ctl file path as target in ovs-appctl to reset 
logs

Presently, logrotate script, searches for the pid files in 
/var/log/openvswitch
and passes the pid file name (without .pid) as target to ovs-appctl. This 
approach
doesn't work for OVN DB servers since the ctl files are generated as 
"ovnnb_db.ctl"
and "ovnsb_db.ctl". So search for the .ctl files instead and use them as 
target to
ovs-appctl.

Suggested-by: Ben Pfaff 
Signed-off-by: Numan Siddique 
Signed-off-by: Ben Pfaff 
Acked-by: Mark Michelson 

commit 19f46fc05301ae420606fb059e80a931b3ca5ae8
Author: Ben Pfaff 
Date:   Thu Apr 13 10:47:55 2017 -0700

debian, xenserver: Update logrotate config to match RHEL.

Commit 618a5b45ae8b ("rhel: Avoid logrotate error if /var/run/openvswitch
does not exist") updated the RHEL logrotate configuration.  This commit
makes similar changes for Debian, by synchronizing with the RHEL version.

In particular:

- Indent to match logrotate.conf(5) examples.

- Use "sharedscripts" flag, because the postrotate script only needs to
  run once regardless of the number of rotations.

- Drop "delaycompress", because the postrotate script does make daemons
  reopen their log files.

- Ignore errors calling vlog/reopen.

Also make similar changes to the xenserver logrotate script.  I confirmed
via Twitter that the xenserver packaging still has users.

CC: Timothy Redaelli 
Signed-off-by: Ben Pfaff 
Acked-by: Gurucharan Shetty 

commit 866e0852290c7c17ff0b3e47f5ff03c16b7ba427
Author: Timothy Redaelli 
Date:   Thu Apr 13 11:48:20 2017 +0200

rhel: Avoid logrotate error if /var/run/openvswitch does not exist

Avoid also errors if an ovs server didn't start correctly or it crashed 
without
deleting the pid file.

Reported-at: https://bugzilla.redhat.com/show_bug.cgi?id=1441524
Signed-off-by: Timothy Redaelli 
Signed-off-by: Ben Pfaff 

commit 9352d3d4f5766778371affe5874763421ada3114
Author: Timothy Redaelli 
Date:   Thu Apr 13 11:48:19 2017 +0200

rhel/etc_logrotate.d_openvswitch: Fix coding style

Replace tabs by 4 spaces and indent the postrotate script like the
examples in 'man logrotate.conf'

Signed-off-by: Timothy Redaelli 
Signed-off-by: Ben Pfaff 


On Mon, Nov 27, 2017 at 01:12:18PM +0530, Numan Siddique wrote:
> Hi Ben,
> 
> Thanks for the review and applying the patches.
> 
> The issue is seen with 2.7 branch. So It would be great if it is backported
> to 2.8 and 2.7 branches.
> 
> Thanks again
> 
> Numan
> 
> 
> On Mon, Nov 27, 2017 at 6:02 AM, Ben Pfaff  wrote:
> 
> > I applied this series to master.  Let me know if you want backports.
> >
> > On Wed, Nov 08, 2017 at 02:28:13PM +0530, nusid...@redhat.com wrote:
> > > From: Numan Siddique 
> > >
> > > v1 -> v2
> > > ---
> > > In patch 2, changed the approach. Instead of fixing the issue in
> > > ovs-appctl, corrected the ovs logrotate script to use complete unix ctl
> > > file path as suggested by Ben.
> > >
> > >
> > > No changes to patch 1.
> > >
> > > v1
> > > --
> > > In Openstack deployment with OVN HA (with v2.7.2) in RHEL, we see that
> > > OVN DB log files are empty after deployment. Adding "-vfile:info" option
> > > when starting ovsdb-servers fixes this issue. Another issue seen is when
> > > openvswitch logrotate script [1] is called, it doesn't initialize the
> > log files
> > > for the OVN DB servers because of which the log file is empty.
> > >
> > > This patch series fixes these issue.
> > >
> > > It would be good if these fixes are applied to branches 2.8 and 2.7.
> > >
> > > [1] - https://github.com/openvswitch/ovs/blob/master/
> > rhel/etc_logrotate.d_openvswitch
> > >
> > >
> > > Numan Siddique (2):
> > >   ovn-ctl: Add -vfile:info option to OVN_NB/SB_LOG options
> > >   OpenvSwitch logrotate: Use ctl file path as target in ovs-appctl to
> > > reset logs
> > >
> > >  debian/openvswi

Re: [ovs-dev] Failed to trace path into open flow groups

2017-11-27 Thread Ben Pfaff
On Mon, Nov 27, 2017 at 04:24:47PM +0800, Yongsheng Gong wrote:
> Hi,
> 
> Ovs version 2.5.2:
> Os: ubuntu 16.04
> 
> I have flows which in open flow groups, and I want to trace the path
> of one kind of flow to know how it is got to a certain open flow port:

I think that this is more of a bug in tracing write_actions().  It's
fixed in 2.8.0, which reports this sort of thing, e.g.:

blp@sigabrt:~/nicira/ovs/_build(0)$ make sandbox
...
blp@sigabrt:~/nicira/ovs/_build(0)$ ovs-vsctl add-br br0
blp@sigabrt:~/nicira/ovs/tutorial(0)$ ovs-ofctl -OOpenflow11 add-flow br0 
'priority=65535,actions=write_actions(1)'
blp@sigabrt:~/nicira/ovs/tutorial(0)$ ovs-appctl ofproto/trace br0 
in_port=local
Flow: 
in_port=LOCAL,vlan_tci=0x,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x

bridge("br0")
-
 0. priority 65535
write_actions(output:1)
 -> action set is: output:1
--. Executing action set:
output:1
 >> Nonexistent output port

Final flow: 
in_port=LOCAL,vlan_tci=0x,dl_src=00:00:00:00:00:00,dl_dst=00:00:00:00:00:00,dl_type=0x
Megaflow: recirc_id=0,eth,in_port=LOCAL,dl_type=0x
Datapath actions: drop
blp@sigabrt:~/nicira/ovs/tutorial(0)$ 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Shrimp Top 15

2017-11-27 Thread Bonesca - Jona
If you are not able to see this mail, click 
http://r.newsletter.bonescamail.nl/7xa28kndqoatrf.html 
Below you can find our special promotion on Vannamei and Black Tiger Shrimps
 
To get the price for 3 palets it is allowed to make a mix of all the 
varieties    




 
* HOSO = Head On Shell On
* CPTO = Cooked Peeled Tail On
* HLSO = Headless Shell On
 
Kind regards,
 
Bonesca Import en Export BV - Netherlands
  

Bertus Brouwer
Director/Purchase - Dutch / English / German
Tel.: +31 (0) 527 70 10 63
Mail: ber...@bonesca.nl
 
Nedal Muhtaseb
Sales - Dutch / Arab / English / German
Tel: +31 (0) 527 68 07 83
Mail: ne...@bonesca.nl
 
Marianne Kramer
Sales - Dutch / French / English / German
Tel: +31 (0) 527 68 07 82
Mail: maria...@bonesca.nl
 
Henry Bakker
Administration/Planning
Tel: +31 (0) 527 68 07 85
Mail: he...@bonesca.nl; i...@bonesca.nl;invo...@bonesca.nl
 
Thu Hanh
Sales Vietnamese / Thai / Laothian / German / English
Tel: +49 (0) 32221097676
Tel: +31 (0) 527 68 07 88
Mail: t...@bonesca.nl
 
Ramesh Raja
Sales Tamil / Dutch / English
Tel: +31 (0) 527 68 07 84
Mail: r...@bonesca.nl

Policarpo J. Olivas
Sales Spanish, French, Portugese, Italian, English
Tel Spain: +34 (0) 649 566 367
Tel Rep. Dominican: +1 (0) 809 778 4040
Mail: poli...@bonesca.nl
 
Marberta Korf
Office assistant Dutch / English
Tel: +31 (0) 527 68 07 86
Mail:marbe...@bonesca.nl
  Warehouse - Albert Korf / Willem Bakker / Fokke Korf
Tel: +31 (0) 527 68 07 89
Mail: expedi...@bonesca.nl   
This email was sent to d...@openvswitch.org
You received this email because you are registered with Bonesca Import en 
Export BV
 
[ Unsubscribe here ]( http://r.newsletter.bonescamail.nl/7xa28kndqoatrg.html )  

Sent by
[  ]( http://r.newsletter.bonescamail.nl/track/click/vp48yalilaoatrd )     
© 2017 Bonesca Import en Export BV  

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] packets: Prefetch the packet metadata in cacheline1.

2017-11-27 Thread Chandran, Sugesh
Hi Bhanu,

Regards
_Sugesh

> -Original Message-
> From: Bodireddy, Bhanuprakash
> Sent: Monday, November 27, 2017 4:35 PM
> To: 'Aaron Conole' 
> Cc: 'd...@openvswitch.org' ; Ben Pfaff ;
> Chandran, Sugesh 
> Subject: RE: [ovs-dev] [PATCH] packets: Prefetch the packet metadata in
> cacheline1.
> 
> >>Bhanuprakash Bodireddy  writes:
> >>
> >>> pkt_metadata_prefetch_init() is used to prefetch the packet metadata
> >>> before initializing the metadata in pkt_metadata_init(). This is
> >>> done for every packet in userspace datapath and is performance critical.
> >>>
> >>> Commit 99fc16c0 prefetches only cachline0 and cacheline2 as the
> >>> metadata part of respective cachelines will be initialized by
> >>pkt_metadata_init().
> >>>
> >>> However in VXLAN case when popping the vxlan header,
> >>> netdev_vxlan_pop_header() invokes pkt_metadata_init_tnl() which
> >>> zeroes out metadata part of
> >>> cacheline1 that wasn't prefetched earlier and causes performance
> >>> degradation.
> >>>
> >>> By prefetching cacheline1, 9% performance improvement is observed.
> >>
> >>Do we see a degredation in the non-vxlan case?  If not, then I don't
> >>see any reason not to apply this patch.
> >
> >This patch doesn't impact the performance of non-vxlan cases and only
> >have a positive impact in vxlan case.
> 
> The commit message claims that the performance improvement was 9% with
> this patch but when Sugesh was checking he wasn't getting that performance
> improvement on his Haswell.
> 
> I was chatting to Sugesh this afternoon on this patch and we found some
> interesting details and much of this boils down to how the OvS is built .( 
> Apart
> from HW, BIOS settings - TB disabled).
> 
> The test case here measure the VXLAN de capsulation performance alone for
> packet sizes of 118 bytes.
> The OvS CFLAGS and throughput numbers are as below.
> 
> CFLAGS="-O2"
> Master  4.667 Mpps
> With Patch   5.045 Mpps
> 
> CFLAGS="-O2 -msse4.2"
> Master  4.710 Mpps
> With Patch   5.097 Mpps
> 
> CFLAGS="-O2 -march=native"
> Master  5.072 Mpps
> With Patch   5.193 Mpps
> 
> CFLAGS="-Ofast -march=native"
> Master  5.349 Mpps
> With Patch   5.378 Mpps
> 
> This means the performance measurements/claims are difficult to assess and as
> one can see above with "-Ofast, -march=native"
> the improvement is insignificant but this is very platform dependent due to
> "march=native" flag. Also the optimization flags seems to make significant
> difference.
[Sugesh] I also tested on my board with same set of configuration and getting 
the same result as yours.
So this patch offers performance improvement based on the compiler option. I am 
not sure whats the most preferred/used 
compiler option out there.
I always build OVS with CFLAGS="-Ofast -march=native" and the patch doesn't 
have a great improvement in it.

I don't mind Acking the patch, if you could re-send the patch with these 
results and options in the commit message. 
Atleast it will offer performance improvement for other build options.

> 
> - Bhanuprakash.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-dpdk: Remove uneeded call to rte_eth_dev_count().

2017-11-27 Thread Kevin Traynor
On 11/27/2017 04:19 PM, Loftus, Ciara wrote:
>>
>> The call to rte_eth_dev_count() was added as workaround
>> for rte_eth_dev_get_port_by_name() not handling cases
>> when there was no DPDK ports. In recent versions of DPDK,
>> rte_eth_dev_get_port_by_name() does handle this
>> case, so the rte_eth_dev_count() call can be removed.
>>
>> CC: Ciara Loftus 
>> Signed-off-by: Kevin Traynor 
>> ---
>>  lib/netdev-dpdk.c | 6 ++
>>  1 file changed, 2 insertions(+), 4 deletions(-)
>>
>> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
>> index faff842..0436ff0 100644
>> --- a/lib/netdev-dpdk.c
>> +++ b/lib/netdev-dpdk.c
>> @@ -1192,6 +1192,5 @@ netdev_dpdk_process_devargs(struct
>> netdev_dpdk *dev,
>>  dpdk_port_t new_port_id = DPDK_ETH_PORT_ID_INVALID;
>>
>> -if (!rte_eth_dev_count()
>> -|| rte_eth_dev_get_port_by_name(name, &new_port_id)
>> +if (rte_eth_dev_get_port_by_name(name, &new_port_id)
>>  || !rte_eth_dev_is_valid_port(new_port_id)) {
>>  /* Device not found in DPDK, attempt to attach it */
>> @@ -2526,6 +2525,5 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
>> int argc OVS_UNUSED,
>>  ovs_mutex_lock(&dpdk_mutex);
>>
>> -if (!rte_eth_dev_count() || rte_eth_dev_get_port_by_name(argv[1],
>> - &port_id)) {
>> +if (rte_eth_dev_get_port_by_name(argv[1], &port_id)) {
> 
> LGTM. DPDK commit f9ae888b1e19 removes the need for the call to count().
> 
> Acked-by: Ciara Loftus 
> 

Thanks - I updated the commit message with that info and the ack in v2.

Kevin.

> Thanks,
> Ciara
> 
>>  response = xasprintf("Device '%s' not found in DPDK", argv[1]);
>>  goto error;
>> --
>> 1.8.3.1
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] tc: Fix build breakage on GCC 7 by annotating fall-through.

2017-11-27 Thread Ben Pfaff
Thanks, I applied this to master.

On Mon, Nov 27, 2017 at 03:36:55PM +0200, Paul Blakey wrote:
> Acked-by: Paul Blakey 
> 
> 
> On 27/11/2017 02:26, Ben Pfaff wrote:
> >Open vSwitch enables the GCC 7+ option that warns about fall-through
> >switch statements.  This commit fixes newly introduced warnings.
> >
> >CC: Paul Blakey 
> >Fixes: d6118e628988 ("netdev-tc-offloads: Verify csum flags on dump from tc")
> >Signed-off-by: Ben Pfaff 
> >---
> >  lib/tc.c | 2 ++
> >  1 file changed, 2 insertions(+)
> >
> >diff --git a/lib/tc.c b/lib/tc.c
> >index d3a031237ab1..bfe20ce1a3bd 100644
> >--- a/lib/tc.c
> >+++ b/lib/tc.c
> >@@ -1250,6 +1250,7 @@ csum_update_flag(struct tc_flower *flower,
> >  switch (htype) {
> >  case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
> >  flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_IPV4HDR;
> >+/* Fall through. */
> >  case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
> >  case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
> >  case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
> >@@ -1269,6 +1270,7 @@ csum_update_flag(struct tc_flower *flower,
> >   flower->key.ip_proto);
> >  break;
> >  }
> >+/* Fall through. */
> >  case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
> >  return 0; /* success */
> >
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] netdev-dpdk: Remove uneeded call to rte_eth_dev_count().

2017-11-27 Thread Kevin Traynor
The call to rte_eth_dev_count() was added as workaround
for rte_eth_dev_get_port_by_name() not handling cases
when there was no DPDK ports.

In versions of DPDK >= 17.02 rte_eth_dev_get_port_by_name()
does handle this case (DPDK commit f9ae888b1e19).
rte_eth_dev_count() is no longer needed so remove it.

Acked-by: Ciara Loftus 
Signed-off-by: Kevin Traynor 
---
 lib/netdev-dpdk.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index faff842..0436ff0 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1192,6 +1192,5 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
 dpdk_port_t new_port_id = DPDK_ETH_PORT_ID_INVALID;
 
-if (!rte_eth_dev_count()
-|| rte_eth_dev_get_port_by_name(name, &new_port_id)
+if (rte_eth_dev_get_port_by_name(name, &new_port_id)
 || !rte_eth_dev_is_valid_port(new_port_id)) {
 /* Device not found in DPDK, attempt to attach it */
@@ -2526,6 +2525,5 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 ovs_mutex_lock(&dpdk_mutex);
 
-if (!rte_eth_dev_count() || rte_eth_dev_get_port_by_name(argv[1],
- &port_id)) {
+if (rte_eth_dev_get_port_by_name(argv[1], &port_id)) {
 response = xasprintf("Device '%s' not found in DPDK", argv[1]);
 goto error;
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] odp-util: Avoid reading wrong table in generate_all_wildcard_mask().

2017-11-27 Thread Ben Pfaff
These lines of code are intended to copy the 'next' and 'next_max' members
of tbl[type] into local variables 'tbl' and 'max':
tbl = tbl[type].next;
max = tbl[type].next_max;
They didn't do it properly because the first line changes 'tbl', so that
the first and seconds lines' references to tbl[type] refer to different
objects.

This commit fixes the problem.

Found by libfuzzer.

Reported-by: Bhargava Shastry 
Signed-off-by: Ben Pfaff 
---
 lib/odp-util.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/odp-util.c b/lib/odp-util.c
index 45a890c46aa0..b7b6a2a9a785 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -3497,8 +3497,9 @@ generate_all_wildcard_mask(const struct attr_len_tbl 
tbl[], int max,
 size_t nested_mask;
 
 if (tbl[type].next) {
-tbl = tbl[type].next;
-max = tbl[type].next_max;
+const struct attr_len_tbl *entry = &tbl[type];
+tbl = entry->next;
+max = entry->next_max;
 }
 
 nested_mask = nl_msg_start_nested(ofp, type);
-- 
2.10.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Revert "dpif_netdev: Refactor dp_netdev_pmd_thread structure."

2017-11-27 Thread Bodireddy, Bhanuprakash
>I agree with Ilya here. Adding theses cache line markers and re-grouping
>variables to minimize gaps in cache lines is creating a maintenance burden
>without any tangible benefit. I have had to go through the pain of refactoring
>my PMD Performance Metrics patch to the new dp_netdev_pmd_thread
>struct and spent a lot of time to analyze the actual memory layout with GDB
>and play Tetris with the variables.

Analyzing the memory layout with gdb for large structures is time consuming and 
not usually recommended.
I would suggest using Poke-a-hole(pahole) and that helps to understand and fix 
the structures in no time.
With pahole it's going to be lot easier to work with large structures 
especially.

>
>There will never be more than a handful of PMDs, so minimizing the gaps does
>not matter from memory perspective. And whether the individual members
>occupy 4 or 5 cache lines does not matter either compared to the many
>hundred cache lines touched for EMC and DPCLS lookups of an Rx batch. And
>any optimization done for x86 is not necessarily optimal for other
>architectures.

I agree that optimization targeted for x86 doesn't necessarily suit ARM due to 
its different cache line size.

>
>Finally, even for x86 there is not even a performance improvement. I re-ran
>our standard L3VPN over VXLAN performance PVP test on master and with
>Ilya's revert patch:
>
>Flows   master  reverted
>8,  4.464.48
>100,4.274.29
>1000,   4.074.07
>2000,   3.683.68
>5000,   3.033.03
>1,  2.762.77
>2,  2.642.65
>5,  2.602.61
>10, 2.602.61
>50, 2.602.61

What are the  CFLAGS in this case, as they seem to make difference. I have 
added my finding here for a different patch targeted at performance
  https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341270.html

Patches to consider when testing your use case:
 Xzalloc_cachline:  
https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341231.html
 (If using output batching)  
https://mail.openvswitch.org/pipermail/ovs-dev/2017-November/341230.html

- Bhanuprakash.

>
>All in all, I support reverting this change.
>
>Regards, Jan
>
>Acked-by: Jan Scheurich 
>
>> -Original Message-
>> From: ovs-dev-boun...@openvswitch.org
>> [mailto:ovs-dev-boun...@openvswitch.org] On Behalf Of Bodireddy,
>> Bhanuprakash
>> Sent: Friday, 24 November, 2017 17:09
>> To: Ilya Maximets ; ovs-dev@openvswitch.org;
>> Ben Pfaff 
>> Cc: Heetae Ahn 
>> Subject: Re: [ovs-dev] [PATCH] Revert "dpif_netdev: Refactor
>dp_netdev_pmd_thread structure."
>>
>> >On 22.11.2017 20:14, Bodireddy, Bhanuprakash wrote:
>> >>> This reverts commit a807c15796ddc43ba1ffb2a6b0bd2ad4e2b73941.
>> >>>
>> >>> Padding and aligning of dp_netdev_pmd_thread structure members is
>> >>> useless, broken in a several ways and only greatly degrades
>> >>> maintainability and extensibility of the structure.
>> >>
>> >> The idea of my earlier patch was to mark the cache lines and reduce
>> >> the
>> >holes while still maintaining the grouping of related members in this
>structure.
>> >
>> >Some of the grouping aspects looks strange. For example, it looks
>> >illogical that 'exit_latch' is grouped with 'flow_table' but not the
>> >'reload_seq' and other reload related stuff. It looks strange that
>> >statistics and counters spread across different groups. So, IMHO, it's not
>well grouped.
>>
>> I had to strike a fine balance and some members may be placed in a
>> different group due to their sizes and importance. Let me think if I can make
>it better.
>>
>> >
>> >> Also cache line marking is a good practice to make some one extra
>> >> cautious
>> >when extending or editing important structures .
>> >> Most importantly I was experimenting with prefetching on this
>> >> structure
>> >and needed cache line markers for it.
>> >>
>> >> I see that you are on ARM (I don't have HW to test) and want to
>> >> know if this
>> >commit has any negative affect and any numbers would be appreciated.
>> >
>> >Basic VM-VM testing shows stable 0.5% perfromance improvement with
>> >revert applied.
>>
>> I did P2P, PVP and PVVP with IXIA and haven't noticed any drop on X86.
>>
>> >Padding adds 560 additional bytes of holes.
>> As the cache line in ARM is 128 , it created holes, I can find a workaround 
>> to
>handle this.
>>
>> >
>> >> More comments inline.
>> >>
>> >>>
>> >>> Issues:
>> >>>
>> >>>1. It's not working because all the instances of struct
>> >>>   dp_netdev_pmd_thread allocated only by usual malloc. All the
>> >>>   memory is not aligned to cachelines -> structure almost never
>> >>>   starts at aligned memory address. This means that any further
>> >>>   paddings and alignments inside the structure are completely
>> >>>   useless. Fo example:
>> >>>
>> >>>   Breakpoint 1, pmd_thread_main
>> >>>   (gdb) p pmd
>> >>>   $49 = (struct dp_netdev_pmd_thread *) 0x1b1af20
>> >>>   (gdb) p &pmd->cacheline1
>> >>>

Re: [ovs-dev] [dpdk-dev] [PATCH RFC] netdev-dpdk: Fix device obtain mac address when received first packet in vhost type

2017-11-27 Thread Aaron Conole
"Tan, Jianfeng"  writes:

> On 11/27/2017 10:27 PM, Yuanhan Liu wrote:
>> On Fri, Nov 24, 2017 at 05:59:09PM +0800, Chen Hailin wrote:
>>> Hi Aaron Conole && Jianfeng,
>>>
>>> The stp could not work in ovs-dpdk vhostuser.
>>> Because the attached vhost device doesn't have MAC address.
>>>
>>> Now we have two ways to solve this problem.
>>> 1. The vhost learns MAC address from packet like as my first patch.
>> I do agree with Aaron this is not the right way.
>
> I do think it should be the vswitch's responsibility to learn mac of
> vhost port.
>
> Except that it's the only feasible way without modifying the spec
> (yuanhan already makes it very clear below), we can treat the vswitch
> as a phsical switch, VM as a physical server, virtio/vhost port as a
> back-to-back connected NICs, the only way of the physical switch to
> know the mac of the NIC on the other side is ARP learning.
>
> Might I ask why you don't think it's a right way?

As a quick example, I think a malicious guest in a multi-tenant
environment could send traffic out to manipulate this feature into
learning an incorrect mac address.

To get this right requires doing deep packet inspection, and making sure
to only learn based on certain l2 traffic.

> Thanks,
> Jianfeng
>
>>
>>> 2. The virtio notifies MAC address actively to vhost user .
>> Unfortunately, AFAIK, there is no way to achieve that so far. we could
>> either let virtio/QEMU to expose the CQ to vhost or add a new VHOST_USER
>> message to carry the mac address. While vhost-user is a generic interface
>> adding a virtio-net specific message also doesn't seem quite right.
>> Exposing CQ is probably the best we can do.
>>
>> Anyway, both need spec change.
>>
>>  --yliu
>>> In my opinions,  if we treat it as a device,  we should allocate
>>> MAC address for the device when the VM started.
>>>
>>> Which one do you think better?
>>>
>>>
>>>
>>> Best Regards,
>>> Chen Hailin
>>> che...@arraynetworks.com.cn
>>>   From: Aaron Conole
>>> Date: 2017-11-18 10:00
>>> To: Hailin Chen
>>> CC: ovs-dev@openvswitch.org; Maxime Coquelin; cl...@arraynetworks.com.cn
>>> Subject: Re: [ovs-dev] [PATCH RFC] netdev-dpdk: Fix device obtain
>>> mac address when received first packet in vhost type
>>> Hi Hailin,
>>>   Hailin Chen  writes:
>>>   
 The stp could not work on netdev-dpdk if network is loop.
 Because the stp protocol negotiates designate port by sending
 BPDU packets which contains MAC address.
 However the device doesn't have MAC address in vhostuser type.
 Thus, function send_bpdu_cb would not send BPDU packets.

 This patch will set the MAC for device when received first packet.

 Signed-off-by: Hailin Chen 
 ---
>>>   Thanks for the patch.
>>>   In general, I don't think this is the right approach to deal with
>>> this
>>> type of issue.  I believe the problem statement is that OvS bridge is
>>> unaware of the guest MAC address - did I get it right?  In that case, I
>>> would think that a better way to solve this would be to have virtio tell
>>> the mac address of the guest.  I don't recall right now if that's
>>> allowed in the virtio spec, but I do remember some kind of negotiation
>>> features.
>>>   I've CC'd Maxime, who is one of the maintainers of the virtio
>>> code from
>>> DPDK side.  Perhaps there is an alternate way to solve this.
>>> ___
>>> dev mailing list
>>> d...@openvswitch.org
>>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [dpdk-dev] [PATCH RFC] netdev-dpdk: Fix device obtain mac address when received first packet in vhost type

2017-11-27 Thread Tan, Jianfeng



On 11/28/2017 12:14 AM, Aaron Conole wrote:

Yuanhan Liu  writes:


On Fri, Nov 24, 2017 at 05:59:09PM +0800, Chen Hailin wrote:

Hi Aaron Conole && Jianfeng,

The stp could not work in ovs-dpdk vhostuser.
Because the attached vhost device doesn't have MAC address.

Now we have two ways to solve this problem.
1. The vhost learns MAC address from packet like as my first patch.

I do agree with Aaron this is not the right way.


2. The virtio notifies MAC address actively to vhost user .

Unfortunately, AFAIK, there is no way to achieve that so far. we could
either let virtio/QEMU to expose the CQ to vhost or add a new VHOST_USER
message to carry the mac address. While vhost-user is a generic interface
adding a virtio-net specific message also doesn't seem quite right.
Exposing CQ is probably the best we can do.

Anyway, both need spec change.

There are other possible ways.  libvirt knows about the mac address from
the domain xml file.  Perhaps it's possible to set the mac column in the
database to the correct value when the port is being created in ovs?
This would be an action taken by the orchestration tool.


In OVS db, we can only see vhost port, but not virtio port. That is to 
say, we could use different mac for vhost port from virtio port, 
especially when it works as a vrouter instead of vswitch.




Additionally, there's a mechanism in virtio-net to set the mac address
from the host to the guest.  Is it possible to expose that functionality
through vhost-user?


We can assign mac addr when starting QEMU. After that, I suppose we 
cannot set mac addr any more, let alone setting it from vhost-user side 
(vhost-user protocol does not support it yet).


Thanks,
Jianfeng



Then when the orchestration tool sets the mac, it can be propagated, and
mac_in_use can reflect the appropriate value.  I think that's a workable
solution, but I might have missed something.


--yliu

In my opinions,  if we treat it as a device,  we should allocate
MAC address for the device when the VM started.

Which one do you think better?



Best Regards,
Chen Hailin
che...@arraynetworks.com.cn
  
From: Aaron Conole

Date: 2017-11-18 10:00
To: Hailin Chen
CC: ovs-dev@openvswitch.org; Maxime Coquelin; cl...@arraynetworks.com.cn
Subject: Re: [ovs-dev] [PATCH RFC] netdev-dpdk: Fix device obtain
mac address when received first packet in vhost type
Hi Hailin,
  
Hailin Chen  writes:
  

The stp could not work on netdev-dpdk if network is loop.
Because the stp protocol negotiates designate port by sending
BPDU packets which contains MAC address.
However the device doesn't have MAC address in vhostuser type.
Thus, function send_bpdu_cb would not send BPDU packets.

This patch will set the MAC for device when received first packet.

Signed-off-by: Hailin Chen 
---
  
Thanks for the patch.
  
In general, I don't think this is the right approach to deal with this

type of issue.  I believe the problem statement is that OvS bridge is
unaware of the guest MAC address - did I get it right?  In that case, I
would think that a better way to solve this would be to have virtio tell
the mac address of the guest.  I don't recall right now if that's
allowed in the virtio spec, but I do remember some kind of negotiation
features.
  
I've CC'd Maxime, who is one of the maintainers of the virtio code from

DPDK side.  Perhaps there is an alternate way to solve this.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] packets: Prefetch the packet metadata in cacheline1.

2017-11-27 Thread Bodireddy, Bhanuprakash
>>Bhanuprakash Bodireddy  writes:
>>
>>> pkt_metadata_prefetch_init() is used to prefetch the packet metadata
>>> before initializing the metadata in pkt_metadata_init(). This is done
>>> for every packet in userspace datapath and is performance critical.
>>>
>>> Commit 99fc16c0 prefetches only cachline0 and cacheline2 as the
>>> metadata part of respective cachelines will be initialized by
>>pkt_metadata_init().
>>>
>>> However in VXLAN case when popping the vxlan header,
>>> netdev_vxlan_pop_header() invokes pkt_metadata_init_tnl() which
>>> zeroes out metadata part of
>>> cacheline1 that wasn't prefetched earlier and causes performance
>>> degradation.
>>>
>>> By prefetching cacheline1, 9% performance improvement is observed.
>>
>>Do we see a degredation in the non-vxlan case?  If not, then I don't
>>see any reason not to apply this patch.
>
>This patch doesn't impact the performance of non-vxlan cases and only have a
>positive impact in vxlan case.

The commit message claims that the performance improvement was 9% with this 
patch
but when Sugesh was checking he wasn't getting that performance improvement on 
his Haswell.

I was chatting to Sugesh this afternoon on this patch and we found some 
interesting details and much
of this boils down to how the OvS is built .( Apart from HW, BIOS settings - TB 
disabled).

The test case here measure the VXLAN de capsulation performance alone for 
packet sizes of 118 bytes.
The OvS CFLAGS and throughput numbers are as below.

CFLAGS="-O2"
Master  4.667 Mpps  
With Patch   5.045 Mpps

CFLAGS="-O2 -msse4.2"
Master  4.710 Mpps
With Patch   5.097 Mpps

CFLAGS="-O2 -march=native"
Master  5.072 Mpps
With Patch   5.193 Mpps

CFLAGS="-Ofast -march=native"
Master  5.349 Mpps
With Patch   5.378 Mpps

This means the performance measurements/claims are difficult to assess and as 
one can see above with "-Ofast, -march=native"
the improvement is insignificant but this is very platform dependent due to 
"march=native" flag. Also the optimization flags seems to
make significant difference.

- Bhanuprakash.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev-dpdk: Remove uneeded call to rte_eth_dev_count().

2017-11-27 Thread Loftus, Ciara
> 
> The call to rte_eth_dev_count() was added as workaround
> for rte_eth_dev_get_port_by_name() not handling cases
> when there was no DPDK ports. In recent versions of DPDK,
> rte_eth_dev_get_port_by_name() does handle this
> case, so the rte_eth_dev_count() call can be removed.
> 
> CC: Ciara Loftus 
> Signed-off-by: Kevin Traynor 
> ---
>  lib/netdev-dpdk.c | 6 ++
>  1 file changed, 2 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index faff842..0436ff0 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -1192,6 +1192,5 @@ netdev_dpdk_process_devargs(struct
> netdev_dpdk *dev,
>  dpdk_port_t new_port_id = DPDK_ETH_PORT_ID_INVALID;
> 
> -if (!rte_eth_dev_count()
> -|| rte_eth_dev_get_port_by_name(name, &new_port_id)
> +if (rte_eth_dev_get_port_by_name(name, &new_port_id)
>  || !rte_eth_dev_is_valid_port(new_port_id)) {
>  /* Device not found in DPDK, attempt to attach it */
> @@ -2526,6 +2525,5 @@ netdev_dpdk_detach(struct unixctl_conn *conn,
> int argc OVS_UNUSED,
>  ovs_mutex_lock(&dpdk_mutex);
> 
> -if (!rte_eth_dev_count() || rte_eth_dev_get_port_by_name(argv[1],
> - &port_id)) {
> +if (rte_eth_dev_get_port_by_name(argv[1], &port_id)) {

LGTM. DPDK commit f9ae888b1e19 removes the need for the call to count().

Acked-by: Ciara Loftus 

Thanks,
Ciara

>  response = xasprintf("Device '%s' not found in DPDK", argv[1]);
>  goto error;
> --
> 1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [dpdk-dev] [PATCH RFC] netdev-dpdk: Fix device obtain mac address when received first packet in vhost type

2017-11-27 Thread Aaron Conole
Yuanhan Liu  writes:

> On Fri, Nov 24, 2017 at 05:59:09PM +0800, Chen Hailin wrote:
>> Hi Aaron Conole && Jianfeng,
>> 
>> The stp could not work in ovs-dpdk vhostuser.
>> Because the attached vhost device doesn't have MAC address.
>> 
>> Now we have two ways to solve this problem.
>> 1. The vhost learns MAC address from packet like as my first patch.
>
> I do agree with Aaron this is not the right way.
>
>> 2. The virtio notifies MAC address actively to vhost user .
>
> Unfortunately, AFAIK, there is no way to achieve that so far. we could
> either let virtio/QEMU to expose the CQ to vhost or add a new VHOST_USER
> message to carry the mac address. While vhost-user is a generic interface
> adding a virtio-net specific message also doesn't seem quite right.
> Exposing CQ is probably the best we can do.
>
> Anyway, both need spec change.

There are other possible ways.  libvirt knows about the mac address from
the domain xml file.  Perhaps it's possible to set the mac column in the
database to the correct value when the port is being created in ovs?
This would be an action taken by the orchestration tool.

Additionally, there's a mechanism in virtio-net to set the mac address
from the host to the guest.  Is it possible to expose that functionality
through vhost-user?

Then when the orchestration tool sets the mac, it can be propagated, and
mac_in_use can reflect the appropriate value.  I think that's a workable
solution, but I might have missed something.

>   --yliu
>> 
>> In my opinions,  if we treat it as a device,  we should allocate 
>> MAC address for the device when the VM started.
>> 
>> Which one do you think better?
>> 
>> 
>> 
>> Best Regards,
>> Chen Hailin
>> che...@arraynetworks.com.cn
>>  
>> From: Aaron Conole
>> Date: 2017-11-18 10:00
>> To: Hailin Chen
>> CC: ovs-dev@openvswitch.org; Maxime Coquelin; cl...@arraynetworks.com.cn
>> Subject: Re: [ovs-dev] [PATCH RFC] netdev-dpdk: Fix device obtain
>> mac address when received first packet in vhost type
>> Hi Hailin,
>>  
>> Hailin Chen  writes:
>>  
>> > The stp could not work on netdev-dpdk if network is loop.
>> > Because the stp protocol negotiates designate port by sending
>> > BPDU packets which contains MAC address.
>> > However the device doesn't have MAC address in vhostuser type.
>> > Thus, function send_bpdu_cb would not send BPDU packets.
>> >
>> > This patch will set the MAC for device when received first packet.
>> >
>> > Signed-off-by: Hailin Chen 
>> > ---
>>  
>> Thanks for the patch.
>>  
>> In general, I don't think this is the right approach to deal with this
>> type of issue.  I believe the problem statement is that OvS bridge is
>> unaware of the guest MAC address - did I get it right?  In that case, I
>> would think that a better way to solve this would be to have virtio tell
>> the mac address of the guest.  I don't recall right now if that's
>> allowed in the virtio spec, but I do remember some kind of negotiation
>> features.
>>  
>> I've CC'd Maxime, who is one of the maintainers of the virtio code from
>> DPDK side.  Perhaps there is an alternate way to solve this.
>> ___
>> dev mailing list
>> d...@openvswitch.org
>> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [dpdk-dev] [PATCH RFC] netdev-dpdk: Fix device obtain mac address when received first packet in vhost type

2017-11-27 Thread Tan, Jianfeng



On 11/27/2017 10:27 PM, Yuanhan Liu wrote:

On Fri, Nov 24, 2017 at 05:59:09PM +0800, Chen Hailin wrote:

Hi Aaron Conole && Jianfeng,

The stp could not work in ovs-dpdk vhostuser.
Because the attached vhost device doesn't have MAC address.

Now we have two ways to solve this problem.
1. The vhost learns MAC address from packet like as my first patch.

I do agree with Aaron this is not the right way.


I do think it should be the vswitch's responsibility to learn mac of 
vhost port.


Except that it's the only feasible way without modifying the spec 
(yuanhan already makes it very clear below), we can treat the vswitch as 
a phsical switch, VM as a physical server, virtio/vhost port as a 
back-to-back connected NICs, the only way of the physical switch to know 
the mac of the NIC on the other side is ARP learning.


Might I ask why you don't think it's a right way?

Thanks,
Jianfeng




2. The virtio notifies MAC address actively to vhost user .

Unfortunately, AFAIK, there is no way to achieve that so far. we could
either let virtio/QEMU to expose the CQ to vhost or add a new VHOST_USER
message to carry the mac address. While vhost-user is a generic interface
adding a virtio-net specific message also doesn't seem quite right.
Exposing CQ is probably the best we can do.

Anyway, both need spec change.

--yliu

In my opinions,  if we treat it as a device,  we should allocate
MAC address for the device when the VM started.

Which one do you think better?



Best Regards,
Chen Hailin
che...@arraynetworks.com.cn
  
From: Aaron Conole

Date: 2017-11-18 10:00
To: Hailin Chen
CC: ovs-dev@openvswitch.org; Maxime Coquelin; cl...@arraynetworks.com.cn
Subject: Re: [ovs-dev] [PATCH RFC] netdev-dpdk: Fix device obtain mac address 
when received first packet in vhost type
Hi Hailin,
  
Hailin Chen  writes:
  

The stp could not work on netdev-dpdk if network is loop.
Because the stp protocol negotiates designate port by sending
BPDU packets which contains MAC address.
However the device doesn't have MAC address in vhostuser type.
Thus, function send_bpdu_cb would not send BPDU packets.

This patch will set the MAC for device when received first packet.

Signed-off-by: Hailin Chen 
---
  
Thanks for the patch.
  
In general, I don't think this is the right approach to deal with this

type of issue.  I believe the problem statement is that OvS bridge is
unaware of the guest MAC address - did I get it right?  In that case, I
would think that a better way to solve this would be to have virtio tell
the mac address of the guest.  I don't recall right now if that's
allowed in the virtio spec, but I do remember some kind of negotiation
features.
  
I've CC'd Maxime, who is one of the maintainers of the virtio code from

DPDK side.  Perhaps there is an alternate way to solve this.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] netdev-dpdk: Remove uneeded call to rte_eth_dev_count().

2017-11-27 Thread Kevin Traynor
The call to rte_eth_dev_count() was added as workaround
for rte_eth_dev_get_port_by_name() not handling cases
when there was no DPDK ports. In recent versions of DPDK,
rte_eth_dev_get_port_by_name() does handle this
case, so the rte_eth_dev_count() call can be removed.

CC: Ciara Loftus 
Signed-off-by: Kevin Traynor 
---
 lib/netdev-dpdk.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index faff842..0436ff0 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1192,6 +1192,5 @@ netdev_dpdk_process_devargs(struct netdev_dpdk *dev,
 dpdk_port_t new_port_id = DPDK_ETH_PORT_ID_INVALID;
 
-if (!rte_eth_dev_count()
-|| rte_eth_dev_get_port_by_name(name, &new_port_id)
+if (rte_eth_dev_get_port_by_name(name, &new_port_id)
 || !rte_eth_dev_is_valid_port(new_port_id)) {
 /* Device not found in DPDK, attempt to attach it */
@@ -2526,6 +2525,5 @@ netdev_dpdk_detach(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 ovs_mutex_lock(&dpdk_mutex);
 
-if (!rte_eth_dev_count() || rte_eth_dev_get_port_by_name(argv[1],
- &port_id)) {
+if (rte_eth_dev_get_port_by_name(argv[1], &port_id)) {
 response = xasprintf("Device '%s' not found in DPDK", argv[1]);
 goto error;
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Revert "dpif_netdev: Refactor dp_netdev_pmd_thread structure."

2017-11-27 Thread Bodireddy, Bhanuprakash
[ snip]
>>> Yes, you will always get aligned addressess on your x86 Linux system
>>> that supports
>>> posix_memalign() call. The comment says what it says because it will
>>> make some memory allocation tricks in case posix_memalign() is not
>>> available (Windows, some MacOS, maybe some Linux systems (not sure))
>>> and the address will not be aligned it this case.
>>
>> I also verified the other case when posix_memalign isn't available and
>> even in that case it returns the address aligned on CACHE_LINE_SIZE
>> boundary. I will send out a patch to use  xzalloc_cacheline for allocating 
>> the
>memory.
>
>I don't know how you tested this, because it is impossible:
>
>   1. OVS allocates some memory:
>   base = xmalloc(...);
>
>   2. Rounds it up to the cache line start:
>   payload = (void **) ROUND_UP((uintptr_t) base,
>CACHE_LINE_SIZE);
>
>   3. Returns the pointer increased by 8 bytes:
>   return (char *) payload + MEM_ALIGN;
>
>So, unless you redefined MEM_ALIGN to zero, you will never get aligned
>memory address while allocating by xmalloc_cacheline() on system without
>posix_memalign().
>

Hmmm, I didn't set MEM_ALIGN to zero instead used below test code to get 
aligned addresses
when posix_memalign() isn't available.  We can't set MEM_ALIGN to zero so have 
to do this
hack to get aligned address and store the initial address (original address 
allocated by malloc) in a place before the
aligned location so that it can be freed  by later  call to free(). (I should 
have mentioned in my previous mail). 

-
void **payload;
void *base;

base = xmalloc(CACHE_LINE_SIZE + size + MEM_ALIGN);
/* Address aligned on CACHE_LINE_SIZE boundary. */
payload = (void**)(((uintptr_t) base + CACHE_LINE_SIZE + MEM_ALIGN) &
~(CACHE_LINE_SIZE - 1));
/* Store the original address so it can be freed later. */
payload[-1] = base;
return (char *)payload;
-

- Bhanuprakash.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovs-ctl: Don't remember vport-* kernel modules

2017-11-27 Thread Eric Garver
On Fri, Nov 10, 2017 at 02:45:05PM -0800, Gurucharan Shetty wrote:
> From OVS 2.8, ovs-vswitchd, when it starts, will
> load the kernel modules for tunnels. It has logic
> inside it to choose either upstream kernel module
> or vport-* kernel module.
> 
> So, when we run 'force-reload-kmod' to upgrade to
> OVS 2.8 from a previous version,  we do not need to
> remember the vport-* kernel module that was previously
> loaded.  It is not really harmful to load vport-* kernel
> module though.
> 
> On RHEL7.x and OVS 2.8, we use the upstream "geneve" kernel
> module for tunnels.
> 
> But, on RHEL 7.x we have hit a bug caused by iptables
> startup script which tries to remove all kernel modules
> related to linux conntrack. It fails to unload openvswitch
> kernel module because it has a reference count on it. But it
> succeeds in unloading vport-geneve and in turn the upstream
> "geneve" kernel module.  This causes the tunnels to go down.
> 
> With this patch, we avoid the above situation, by not loading
> vport-geneve kernel module.  ovs-vswitchd when it starts will
> load upstream geneve. And when "iptables stop" runs, since
> "geneve" has nothing to do with conntrack, it spares it.
> Ideally, we should fix this by incrementing the refcount
> on the kernel modules.
> 
> Signed-off-by: Gurucharan Shetty 

Hi Guru,
Sorry for the delayed response. I missed this before I went on holiday.

> ---
>  utilities/ovs-ctl.in | 11 ---
>  1 file changed, 11 deletions(-)
> 
> diff --git a/utilities/ovs-ctl.in b/utilities/ovs-ctl.in
> index 2e1209a..f1b01d1 100755
> --- a/utilities/ovs-ctl.in
> +++ b/utilities/ovs-ctl.in
> @@ -30,17 +30,9 @@ done
>  ## start ##
>  ## - ##
>  
> -# Keep track of removed vports so we can reload them if needed
> -removed_vports=""
> -
>  insert_mods () {
>  # Try loading openvswitch again.
>  action "Inserting openvswitch module" modprobe openvswitch
> -
> -for vport in $removed_vports; do
> -# Don't treat failures to load vports as fatal error
> -action "Inserting $vport module" modprobe $vport || true
> -done
>  }

Does this break things if we can't use the in kernel tunnels? Have you
tried this on an < 4.3 kernel? In that case we should have to use the
compat/vport interface.

>  
>  insert_mod_if_required () {
> @@ -398,9 +390,6 @@ force_reload_kmod () {
>  
>  for vport in `awk '/^vport_/ { print $1 }' /proc/modules`; do
>  action "Removing $vport module" rmmod $vport
> -if ! grep -q $vport /proc/modules; then
> -removed_vports="$removed_vports $vport"
> -fi
>  done
>  
>  if test -e /sys/module/openvswitch; then
> -- 
> 2.7.4
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] net: openvswitch: datapath: fix data type in queue_gso_packets

2017-11-27 Thread Gustavo A. R. Silva


Quoting David Miller :


From: Willem de Bruijn 
Date: Sat, 25 Nov 2017 16:15:01 -0500


On Sat, Nov 25, 2017 at 2:14 PM, Gustavo A. R. Silva
 wrote:

gso_type is being used in binary AND operations together with SKB_GSO_UDP.
The issue is that variable gso_type is of type unsigned short and
SKB_GSO_UDP expands to more than 16 bits:

SKB_GSO_UDP = 1 << 16

this makes any binary AND operation between gso_type and SKB_GSO_UDP to
be always zero, hence making some code unreachable and likely causing
undesired behavior.

Fix this by changing the data type of variable gso_type to unsigned int.

Addresses-Coverity-ID: 1462223
Fixes: 0c19f846d582 ("net: accept UFO datagrams from tuntap and packet")
Signed-off-by: Gustavo A. R. Silva 


Acked-by: Willem de Bruijn 


Applied and I'll queued this up with Willem's changes for -stable.

Thanks!


Glad to help :)

Thanks
--
Gustavo A. R. Silva





___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 0/2] vHost Dequeue Zero Copy

2017-11-27 Thread Jan Scheurich
Hi Ciara,

> Thanks for your feedback. The limitation is only placed on phy port queues on 
> the VP (vhost -> phy) path. VV path and PV path are not
> affected.

Yes, you are right. VM to VM traffic is copied on transmit to the second VM.

> > I would much rather put a requirement on tenants that their virtio drivers
> > need to allocate enough virtio packet buffers if they want their VM to use
> > zero-copy vhostuser ports. Or is the critical resource  owned and managed by
> > Qemu and we'd need a patch on Qemu to overcome this limit?

Can you comment on that? Can a user also reduce the problem by configuring
a) a larger virtio Tx queue size (up to 1K) in Qemu, or
b) a larger mempool for packets in Tx direction inside the guest (driver?) 

> >
> > And what about increased packet drop risk due to shortened tx queues?
> 
> I guess this could be an issue. If I had some data to back this up I would 
> include it in the documentation and mention the risk.
> If the risk is unacceptable to the user they may choose to not enable the 
> feature. It's disabled by default so shouldn't introduce an issue for
> the standard case.

Yes, but it would be good to understand the potential drawback for a better 
judgement of the trade-off between better raw throughput and higher loss risk.

Regards, Jan
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU feature

2017-11-27 Thread Jan Scheurich
> > Conclusion: A global other_config parameter to enable/disable vhostuser
> > IOMMU is sufficient. By default this could be OFF for now and changed
> > to ON when broken Qemu versions are largely gone.

> [Mooney, Sean K] hi yes I just responded to this before I saw your reply.
> A global option will be fine if we can confirm that enabling the iommu 
> feature negotiation
> In ovs will not impact vms that do not have a virtualized iommu enabled in 
> the Libvirt/ qemu commandline.
> I would personally consider it a driver bug if ovs advertised support for 
> using a virtualized iommu and
> The driver in the guest requested it with our have an actual iommu present in 
> the vm.
> 

I agree. I think it is up to driver and Qemu VM to agree on whether it is OK to 
use vIOMMU in the first place. If OVS as vhostuser supports vIOMMU, it will be 
used, otherwise I would expect either of two things to happen:
1. Qemu falls back to vhostuser without IOMMU, or 
2. Qemu fails to start the VM.

My guess would be the latter, but I do not know. 

BR, Jan
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU feature

2017-11-27 Thread Mooney, Sean K


> -Original Message-
> From: Jan Scheurich [mailto:jan.scheur...@ericsson.com]
> Sent: Monday, November 27, 2017 2:15 PM
> To: Kavanagh, Mark B ; Mooney, Sean K
> ; Kevin Traynor ;
> d...@openvswitch.org
> Cc: maxime.coque...@redhat.com; Flavio Leitner ; Franck
> Baudin ; Ilya Maximets ;
> Stokes, Ian ; Loftus, Ciara
> ; Darrell Ball ; Aaron Conole
> 
> Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost
> IOMMU feature
> 
> > >> >> Hi Mark, All,
> > >> >>
> > >> >> I'm thinking about this and whether the current approach
> > >> >> provides more than what is actually needed by users at the cost
> > >> >> of making the user interface more complex.
> > >[Mooney, Sean K] I am personally split on this. To enable iommu
> > >support in openstack with the above interface we would have to do
> two
> > >things. 1 extend the image metatdata or flavor extra specs to allow
> > >requesting a vIOMMU. Second we would have to modify os-vif to
> produce
> > >the add-port command above. Using this interfaces gives us a nice
> > >parity in our api as we only enable iommu support in ovs if we
> enable
> > >for qemu. E.g. if the falvor/image does not request a virtualized
> > >iommu we wont declare its support in the options.
> > >> >>
> > >> >> As an alternative, how about having a global other_config (to
> be
> > >> >> set like vhost-socket-dir can be) for this instead of having to
> > >> >> set it for each individual interface. It would mean that it
> > >> >> would only have to be set once, instead of having this (ugly?!)
> > >> >> option every time a vhost port is added, so it's a less
> > >> >> intrusive change and I can't really think that a user would
> > >> >> require to do this per vhostclient
> > >[Mooney, Sean K]  well one taught that instantly comes to mind is if
> > >I set The global other_config option what happens if I do not
> request
> > >a iommu in qemu Or I have an old qemu.  If it would have any
> negative
> > >effect on network connectivity Or prevent the vm from functioning,
> > >that would require the nova scheduler to be Aware of which node had
> > >this option set and take that into account when placing vms.
> > >I assume if it had no negative effects  then we would not need a
> > >option, global or per Interface.
> > >> >> interface??? It's pain to have to add this at all for a bug in
> > >> >> QEMU and I'm sure in 1/2/3 years time someone will say that
> > >> >> users could still be using QEMU < 2.9.1 and we can't remove it,
> > >> >> so it would be nice to keep it as discreet as possible as we're
> > >> >> going to be stuck
> > >> with it for a while.
> > >> >>
> > >> >> I assume that a user would only use one version of QEMU at a
> > >> >> time
> > >> and
> > >> >> would either want or not want this feature. In the worst case,
> > >> >> if that proved completely wrong in the future, then a per
> > >> >> interface override could easily be added. Once there's a way to
> > >> >> maintain backwards compatibility (which there would be) I'd
> > >> >> rather err on the side of introducing just enough enough
> > >> >> functionality over increasing complexity for the user.
> > >> >>
> > >> >> What do you think?
> > >[Mooney, Sean K] I'm not sure that a single qemu version is a valid
> > >assumption to make.
> > >At least in an OpenStack context where rolling upgrades are a thing.
> > >But you are right That it will be less common however if it was no
> > >existent we would not have the issue with Live migration between
> > >nodes with different feature sets that is also trying to be
> addressed
> > >this Cycle. If we add a global config option for iommu support that
> > >is yet another thing that needs To be accounted for during live
> > >migration.
> > >> >>
> >
> > Hi Kevin, Jan,
> >
> > Any thoughts on Sean's concerns?
> >
> > I'll hold off on implementation until we have consensus.
> >
> > Thanks,
> > Mark
> 
> Here are my 2cts:
> 
> As I see it, vIOMMU for vhostuser is a pure infrastructure feature
> negotiated between guest driver,  Qemu and OVS. If both Qemu and OVS
> support it and the driver requests it, it should be used, otherwise
> not.
> 
> My understanding is that the vhostuser library in DPDK 17.11 supports
> vIOMMU, such that OVS could always signal support for this feature. The
> only reason not to do so is to work around the problem that Qemu claims
> vIOMMU support in certain releases but the vhostuser backend
> implementation is broken (prior to 2.9.1). For these cases it should be
> sufficient to globally disable vIOMMU support in OVS.
> 
> I don't see the need for one OVS instance to interwork with multiple
> different Qemu versions on the same host. And even if that were the
> case in an upgrade scenario, one could keep vIOMMU disabled until the
> old Qemu is removed.
> 
> The specific enabling of vIOMMU per tenant VM port is done by supplying
> an iommu device to the VM in the Libvirt XML and enabling iommu for the
> device in the driver element of the int

Re: [ovs-dev] [dpdk-dev] [PATCH RFC] netdev-dpdk: Fix device obtain mac address when received first packet in vhost type

2017-11-27 Thread Yuanhan Liu
On Fri, Nov 24, 2017 at 05:59:09PM +0800, Chen Hailin wrote:
> Hi Aaron Conole && Jianfeng,
> 
> The stp could not work in ovs-dpdk vhostuser.
> Because the attached vhost device doesn't have MAC address.
> 
> Now we have two ways to solve this problem.
> 1. The vhost learns MAC address from packet like as my first patch.

I do agree with Aaron this is not the right way.

> 2. The virtio notifies MAC address actively to vhost user .

Unfortunately, AFAIK, there is no way to achieve that so far. we could
either let virtio/QEMU to expose the CQ to vhost or add a new VHOST_USER
message to carry the mac address. While vhost-user is a generic interface
adding a virtio-net specific message also doesn't seem quite right.
Exposing CQ is probably the best we can do.

Anyway, both need spec change.

--yliu
> 
> In my opinions,  if we treat it as a device,  we should allocate 
> MAC address for the device when the VM started.
> 
> Which one do you think better?
> 
> 
> 
> Best Regards,
> Chen Hailin
> che...@arraynetworks.com.cn
>  
> From: Aaron Conole
> Date: 2017-11-18 10:00
> To: Hailin Chen
> CC: ovs-dev@openvswitch.org; Maxime Coquelin; cl...@arraynetworks.com.cn
> Subject: Re: [ovs-dev] [PATCH RFC] netdev-dpdk: Fix device obtain mac address 
> when received first packet in vhost type
> Hi Hailin,
>  
> Hailin Chen  writes:
>  
> > The stp could not work on netdev-dpdk if network is loop.
> > Because the stp protocol negotiates designate port by sending
> > BPDU packets which contains MAC address.
> > However the device doesn't have MAC address in vhostuser type.
> > Thus, function send_bpdu_cb would not send BPDU packets.
> >
> > This patch will set the MAC for device when received first packet.
> >
> > Signed-off-by: Hailin Chen 
> > ---
>  
> Thanks for the patch.
>  
> In general, I don't think this is the right approach to deal with this
> type of issue.  I believe the problem statement is that OvS bridge is
> unaware of the guest MAC address - did I get it right?  In that case, I
> would think that a better way to solve this would be to have virtio tell
> the mac address of the guest.  I don't recall right now if that's
> allowed in the virtio spec, but I do remember some kind of negotiation
> features.
>  
> I've CC'd Maxime, who is one of the maintainers of the virtio code from
> DPDK side.  Perhaps there is an alternate way to solve this.
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU feature

2017-11-27 Thread Mooney, Sean K


> -Original Message-
> From: Kevin Traynor [mailto:ktray...@redhat.com]
> Sent: Monday, November 27, 2017 1:32 PM
> To: Kavanagh, Mark B ; Mooney, Sean K
> ; Jan Scheurich ;
> d...@openvswitch.org
> Cc: maxime.coque...@redhat.com; Flavio Leitner ; Franck
> Baudin ; Ilya Maximets ;
> Stokes, Ian ; Loftus, Ciara
> ; Darrell Ball ; Aaron Conole
> 
> Subject: Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost
> IOMMU feature
> 
> On 11/27/2017 12:55 PM, Kavanagh, Mark B wrote:
> >
> >
> >> From: Mooney, Sean K
> >> Sent: Sunday, November 26, 2017 11:28 PM
> >> To: Kavanagh, Mark B ; Jan Scheurich
> >> ; Kevin Traynor ;
> >> d...@openvswitch.org
> >> Cc: maxime.coque...@redhat.com; Flavio Leitner ;
> >> Franck Baudin ; Ilya Maximets
> >> ; Stokes, Ian ;
> Loftus,
> >> Ciara ; Darrell Ball ;
> >> Aaron Conole ; Mooney, Sean K
> >> 
> >> Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for
> vhost
> >> IOMMU feature
> >>
> >>
> >>
> >>> -Original Message-
> >>> From: Kavanagh, Mark B
> >>> Sent: Friday, November 24, 2017 4:45 PM
> >>> To: Jan Scheurich ; Kevin Traynor
> >>> ; d...@openvswitch.org
> >>> Cc: maxime.coque...@redhat.com; Flavio Leitner ;
> >>> Franck Baudin ; Mooney, Sean K
> >>> ; Ilya Maximets ;
> >>> Stokes, Ian ; Loftus, Ciara
> >>> ; Darrell Ball ; Aaron
> >>> Conole 
> >>> Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for
> >>> vhost IOMMU feature
> >>>
> >>> Sounds good guys - I'll get cracking on this on Monday.
> >>>
> >>> Cheers,
> >>> Mark
> >>>
>  -Original Message-
>  From: Jan Scheurich [mailto:jan.scheur...@ericsson.com]
>  Sent: Friday, November 24, 2017 4:21 PM
>  To: Kevin Traynor ; Kavanagh, Mark B
>  ; d...@openvswitch.org
>  Cc: maxime.coque...@redhat.com; Flavio Leitner ;
> >>> Franck
>  Baudin ; Mooney, Sean K
>  ; Ilya Maximets ;
>  Stokes, Ian ; Loftus, Ciara
>  ;
> >>> Darrell
>  Ball ; Aaron Conole 
>  Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for
>  vhost IOMMU feature
> 
>  +1
>  Jan
> 
> > -Original Message-
> > From: Kevin Traynor [mailto:ktray...@redhat.com]
> > Sent: Friday, 24 November, 2017 17:11
> > To: Mark Kavanagh ;
> d...@openvswitch.org
> > Cc: maxime.coque...@redhat.com; Flavio Leitner ;
> > Franck
>  Baudin ; Mooney, Sean K
> > ; Ilya Maximets
> ;
> > Ian
>  Stokes ; Loftus, Ciara
> > ; Darrell Ball ; Aaron
> > Conole
>  ; Jan Scheurich
> > 
> > Subject: Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for
> >>> vhost
> > IOMMU
>  feature
> >
> > On 11/16/2017 11:01 AM, Mark Kavanagh wrote:
> >> DPDK v17.11 introduces support for the vHost IOMMU feature.
> >> This is a security feature, that restricts the vhost memory that
> >> a virtio device may access.
> >>
> >> This feature also enables the vhost REPLY_ACK protocol, the
> >> implementation of which is known to work in newer versions of
> >> QEMU (i.e. v2.10.0), but is buggy in older versions (v2.7.0 -
> >> v2.9.0, inclusive). As such, the feature is disabled by default
> >> in (and should remain so, for the aforementioned older QEMU
> verions).
> >> Starting with QEMU v2.9.1, vhost-iommu-support can safely be
> >> enabled, even without having an IOMMU device, with no
> performance
> >> penalty.
> >>
> >> This patch adds a new vhost port option, vhost-iommu-support, to
> >> allow enablement of the vhost IOMMU feature:
> >>
> >> $ ovs-vsctl add-port br0 vhost-client-1 \
> >> -- set Interface vhost-client-1 type=dpdkvhostuserclient
> \
> >>  options:vhost-server-path=$VHOST_USER_SOCKET_PATH
> \
> >>  options:vhost-iommu-support=true
> >>
> >
> > Hi Mark, All,
> >
> > I'm thinking about this and whether the current approach provides
> > more than what is actually needed by users at the cost of making
> > the user interface more complex.
> >> [Mooney, Sean K] I am personally split on this. To enable iommu
> >> support in openstack with the above interface we would have to do
> two
> >> things. 1 extend the image metatdata or flavor extra specs to allow
> >> requesting a vIOMMU. Second we would have to modify os-vif to
> produce
> >> the add-port command above. Using this interfaces gives us a nice
> >> parity in our api as we only enable iommu support in ovs if we
> enable
> >> for qemu. E.g. if the falvor/image does not request a virtualized
> >> iommu we wont declare its support in the options.
> >
> > As an alternative, how about having a global other_config (to be
> > set like vhost-socket-dir can be) for this instead of having to
> > set it for each individual interface. It would mean that it would
> > only have to be set once, instead of having this (ugly?!) option
> > every time a vhost port is added, so it's

Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU feature

2017-11-27 Thread Kavanagh, Mark B
>From: Jan Scheurich [mailto:jan.scheur...@ericsson.com]
>Sent: Monday, November 27, 2017 2:15 PM
>To: Kavanagh, Mark B ; Mooney, Sean K
>; Kevin Traynor ;
>d...@openvswitch.org
>Cc: maxime.coque...@redhat.com; Flavio Leitner ; Franck Baudin
>; Ilya Maximets ; Stokes, Ian
>; Loftus, Ciara ; Darrell Ball
>; Aaron Conole 
>Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU
>feature
>
>> >> >> Hi Mark, All,
>> >> >>
>> >> >> I'm thinking about this and whether the current approach provides
>> >> >> more than what is actually needed by users at the cost of making the
>> >> >> user interface more complex.
>> >[Mooney, Sean K] I am personally split on this. To enable iommu support in
>> >openstack with the above interface we would have to do two things. 1 extend
>> >the image metatdata
>> >or flavor extra specs to allow requesting a vIOMMU. Second we would have to
>> >modify os-vif to produce
>> >the add-port command above. Using this interfaces gives us a nice parity in
>> >our api
>> >as we only enable iommu support in ovs if we enable for qemu. E.g. if the
>> >falvor/image does not request
>> >a virtualized iommu we wont declare its support in the options.
>> >> >>
>> >> >> As an alternative, how about having a global other_config (to be set
>> >> >> like vhost-socket-dir can be) for this instead of having to set it
>> >> >> for each individual interface. It would mean that it would only have
>> >> >> to be set once, instead of having this (ugly?!) option every time a
>> >> >> vhost port is added, so it's a less intrusive change and I can't
>> >> >> really think that a user would require to do this per vhostclient
>> >[Mooney, Sean K]  well one taught that instantly comes to mind is if I set
>> >The global other_config option what happens if I do not request a iommu in
>> >qemu
>> >Or I have an old qemu.  If it would have any negative effect on network
>> >connectivity
>> >Or prevent the vm from functioning, that would require the nova scheduler
>to
>> >be
>> >Aware of which node had this option set and take that into account when
>> >placing vms.
>> >I assume if it had no negative effects  then we would not need a option,
>> >global or per
>> >Interface.
>> >> >> interface??? It's pain to have to add this at all for a bug in QEMU
>> >> >> and I'm sure in 1/2/3 years time someone will say that users could
>> >> >> still be using QEMU < 2.9.1 and we can't remove it, so it would be
>> >> >> nice to keep it as discreet as possible as we're going to be stuck
>> >> with it for a while.
>> >> >>
>> >> >> I assume that a user would only use one version of QEMU at a time
>> >> and
>> >> >> would either want or not want this feature. In the worst case, if
>> >> >> that proved completely wrong in the future, then a per interface
>> >> >> override could easily be added. Once there's a way to maintain
>> >> >> backwards compatibility (which there would be) I'd rather err on the
>> >> >> side of introducing just enough enough functionality over increasing
>> >> >> complexity for the user.
>> >> >>
>> >> >> What do you think?
>> >[Mooney, Sean K] I'm not sure that a single qemu version is a valid
>assumption
>> >to make.
>> >At least in an OpenStack context where rolling upgrades are a thing. But
>you
>> >are right
>> >That it will be less common however if it was no existent we would not have
>> >the issue with
>> >Live migration between nodes with different feature sets that is also
>trying
>> >to be addressed this
>> >Cycle. If we add a global config option for iommu support that is yet
>another
>> >thing that needs
>> >To be accounted for during live migration.
>> >> >>
>>
>> Hi Kevin, Jan,
>>
>> Any thoughts on Sean's concerns?
>>
>> I'll hold off on implementation until we have consensus.
>>
>> Thanks,
>> Mark
>
>Here are my 2cts:
>
>As I see it, vIOMMU for vhostuser is a pure infrastructure feature negotiated
>between guest driver,  Qemu and OVS. If both Qemu and OVS support it and the
>driver requests it, it should be used, otherwise not.
>
>My understanding is that the vhostuser library in DPDK 17.11 supports vIOMMU,
>such that OVS could always signal support for this feature. The only reason
>not to do so is to work around the problem that Qemu claims vIOMMU support in
>certain releases but the vhostuser backend implementation is broken (prior to
>2.9.1). For these cases it should be sufficient to globally disable vIOMMU
>support in OVS.
>
>I don't see the need for one OVS instance to interwork with multiple different
>Qemu versions on the same host. And even if that were the case in an upgrade
>scenario, one could keep vIOMMU disabled until the old Qemu is removed.
>
>The specific enabling of vIOMMU per tenant VM port is done by supplying an
>iommu device to the VM in the Libvirt XML and enabling iommu for the device in
>the driver element of the interface section. These are the places that
>Nova/OpenStack would use to control the vIOMMU per VM based on image
>properties or flavor extra 

Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU feature

2017-11-27 Thread Jan Scheurich
> >> >> Hi Mark, All,
> >> >>
> >> >> I'm thinking about this and whether the current approach provides
> >> >> more than what is actually needed by users at the cost of making the
> >> >> user interface more complex.
> >[Mooney, Sean K] I am personally split on this. To enable iommu support in
> >openstack with the above interface we would have to do two things. 1 extend
> >the image metatdata
> >or flavor extra specs to allow requesting a vIOMMU. Second we would have to
> >modify os-vif to produce
> >the add-port command above. Using this interfaces gives us a nice parity in
> >our api
> >as we only enable iommu support in ovs if we enable for qemu. E.g. if the
> >falvor/image does not request
> >a virtualized iommu we wont declare its support in the options.
> >> >>
> >> >> As an alternative, how about having a global other_config (to be set
> >> >> like vhost-socket-dir can be) for this instead of having to set it
> >> >> for each individual interface. It would mean that it would only have
> >> >> to be set once, instead of having this (ugly?!) option every time a
> >> >> vhost port is added, so it's a less intrusive change and I can't
> >> >> really think that a user would require to do this per vhostclient
> >[Mooney, Sean K]  well one taught that instantly comes to mind is if I set
> >The global other_config option what happens if I do not request a iommu in
> >qemu
> >Or I have an old qemu.  If it would have any negative effect on network
> >connectivity
> >Or prevent the vm from functioning, that would require the nova scheduler to
> >be
> >Aware of which node had this option set and take that into account when
> >placing vms.
> >I assume if it had no negative effects  then we would not need a option,
> >global or per
> >Interface.
> >> >> interface??? It's pain to have to add this at all for a bug in QEMU
> >> >> and I'm sure in 1/2/3 years time someone will say that users could
> >> >> still be using QEMU < 2.9.1 and we can't remove it, so it would be
> >> >> nice to keep it as discreet as possible as we're going to be stuck
> >> with it for a while.
> >> >>
> >> >> I assume that a user would only use one version of QEMU at a time
> >> and
> >> >> would either want or not want this feature. In the worst case, if
> >> >> that proved completely wrong in the future, then a per interface
> >> >> override could easily be added. Once there's a way to maintain
> >> >> backwards compatibility (which there would be) I'd rather err on the
> >> >> side of introducing just enough enough functionality over increasing
> >> >> complexity for the user.
> >> >>
> >> >> What do you think?
> >[Mooney, Sean K] I'm not sure that a single qemu version is a valid 
> >assumption
> >to make.
> >At least in an OpenStack context where rolling upgrades are a thing. But you
> >are right
> >That it will be less common however if it was no existent we would not have
> >the issue with
> >Live migration between nodes with different feature sets that is also trying
> >to be addressed this
> >Cycle. If we add a global config option for iommu support that is yet another
> >thing that needs
> >To be accounted for during live migration.
> >> >>
> 
> Hi Kevin, Jan,
> 
> Any thoughts on Sean's concerns?
> 
> I'll hold off on implementation until we have consensus.
> 
> Thanks,
> Mark

Here are my 2cts:

As I see it, vIOMMU for vhostuser is a pure infrastructure feature negotiated 
between guest driver,  Qemu and OVS. If both Qemu and OVS support it and the 
driver requests it, it should be used, otherwise not.

My understanding is that the vhostuser library in DPDK 17.11 supports vIOMMU, 
such that OVS could always signal support for this feature. The only reason not 
to do so is to work around the problem that Qemu claims vIOMMU support in 
certain releases but the vhostuser backend implementation is broken (prior to 
2.9.1). For these cases it should be sufficient to globally disable vIOMMU 
support in OVS.

I don't see the need for one OVS instance to interwork with multiple different 
Qemu versions on the same host. And even if that were the case in an upgrade 
scenario, one could keep vIOMMU disabled until the old Qemu is removed.

The specific enabling of vIOMMU per tenant VM port is done by supplying an 
iommu device to the VM in the Libvirt XML and enabling iommu for the device in 
the driver element of the interface section. These are the places that 
Nova/OpenStack would use to control the vIOMMU per VM based on image properties 
or flavor extra specs. I do not see any value to additionally configure the 
iommu support per vhostuser port through OS-VIF.

Conclusion: A global other_config parameter to enable/disable vhostuser IOMMU 
is sufficient. By default this could be OFF for now and changed to ON when 
broken Qemu versions are largely gone.

BR, Jan

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] tc: Fix build breakage on GCC 7 by annotating fall-through.

2017-11-27 Thread Paul Blakey

Acked-by: Paul Blakey 


On 27/11/2017 02:26, Ben Pfaff wrote:

Open vSwitch enables the GCC 7+ option that warns about fall-through
switch statements.  This commit fixes newly introduced warnings.

CC: Paul Blakey 
Fixes: d6118e628988 ("netdev-tc-offloads: Verify csum flags on dump from tc")
Signed-off-by: Ben Pfaff 
---
  lib/tc.c | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/lib/tc.c b/lib/tc.c
index d3a031237ab1..bfe20ce1a3bd 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -1250,6 +1250,7 @@ csum_update_flag(struct tc_flower *flower,
  switch (htype) {
  case TCA_PEDIT_KEY_EX_HDR_TYPE_IP4:
  flower->csum_update_flags |= TCA_CSUM_UPDATE_FLAG_IPV4HDR;
+/* Fall through. */
  case TCA_PEDIT_KEY_EX_HDR_TYPE_IP6:
  case TCA_PEDIT_KEY_EX_HDR_TYPE_TCP:
  case TCA_PEDIT_KEY_EX_HDR_TYPE_UDP:
@@ -1269,6 +1270,7 @@ csum_update_flag(struct tc_flower *flower,
   flower->key.ip_proto);
  break;
  }
+/* Fall through. */
  case TCA_PEDIT_KEY_EX_HDR_TYPE_ETH:
  return 0; /* success */
  


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU feature

2017-11-27 Thread Kevin Traynor
On 11/27/2017 12:55 PM, Kavanagh, Mark B wrote:
> 
> 
>> From: Mooney, Sean K
>> Sent: Sunday, November 26, 2017 11:28 PM
>> To: Kavanagh, Mark B ; Jan Scheurich
>> ; Kevin Traynor ;
>> d...@openvswitch.org
>> Cc: maxime.coque...@redhat.com; Flavio Leitner ; Franck 
>> Baudin
>> ; Ilya Maximets ; Stokes, Ian
>> ; Loftus, Ciara ; Darrell Ball
>> ; Aaron Conole ; Mooney, Sean K
>> 
>> Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU
>> feature
>>
>>
>>
>>> -Original Message-
>>> From: Kavanagh, Mark B
>>> Sent: Friday, November 24, 2017 4:45 PM
>>> To: Jan Scheurich ; Kevin Traynor
>>> ; d...@openvswitch.org
>>> Cc: maxime.coque...@redhat.com; Flavio Leitner ; Franck
>>> Baudin ; Mooney, Sean K ;
>>> Ilya Maximets ; Stokes, Ian
>>> ; Loftus, Ciara ; Darrell
>>> Ball ; Aaron Conole 
>>> Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost
>>> IOMMU feature
>>>
>>> Sounds good guys - I'll get cracking on this on Monday.
>>>
>>> Cheers,
>>> Mark
>>>
 -Original Message-
 From: Jan Scheurich [mailto:jan.scheur...@ericsson.com]
 Sent: Friday, November 24, 2017 4:21 PM
 To: Kevin Traynor ; Kavanagh, Mark B
 ; d...@openvswitch.org
 Cc: maxime.coque...@redhat.com; Flavio Leitner ;
>>> Franck
 Baudin ; Mooney, Sean K ;
 Ilya Maximets ; Stokes, Ian
 ; Loftus, Ciara ;
>>> Darrell
 Ball ; Aaron Conole 
 Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost
 IOMMU feature

 +1
 Jan

> -Original Message-
> From: Kevin Traynor [mailto:ktray...@redhat.com]
> Sent: Friday, 24 November, 2017 17:11
> To: Mark Kavanagh ; d...@openvswitch.org
> Cc: maxime.coque...@redhat.com; Flavio Leitner ;
> Franck
 Baudin ; Mooney, Sean K
> ; Ilya Maximets ;
> Ian
 Stokes ; Loftus, Ciara
> ; Darrell Ball ; Aaron
> Conole
 ; Jan Scheurich
> 
> Subject: Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for
>>> vhost
> IOMMU
 feature
>
> On 11/16/2017 11:01 AM, Mark Kavanagh wrote:
>> DPDK v17.11 introduces support for the vHost IOMMU feature.
>> This is a security feature, that restricts the vhost memory that a
>> virtio device may access.
>>
>> This feature also enables the vhost REPLY_ACK protocol, the
>> implementation of which is known to work in newer versions of QEMU
>> (i.e. v2.10.0), but is buggy in older versions (v2.7.0 - v2.9.0,
>> inclusive). As such, the feature is disabled by default in (and
>> should remain so, for the aforementioned older QEMU verions).
>> Starting with QEMU v2.9.1, vhost-iommu-support can safely be
>> enabled, even without having an IOMMU device, with no performance
>> penalty.
>>
>> This patch adds a new vhost port option, vhost-iommu-support, to
>> allow enablement of the vhost IOMMU feature:
>>
>> $ ovs-vsctl add-port br0 vhost-client-1 \
>> -- set Interface vhost-client-1 type=dpdkvhostuserclient \
>>  options:vhost-server-path=$VHOST_USER_SOCKET_PATH   \
>>  options:vhost-iommu-support=true
>>
>
> Hi Mark, All,
>
> I'm thinking about this and whether the current approach provides
> more than what is actually needed by users at the cost of making the
> user interface more complex.
>> [Mooney, Sean K] I am personally split on this. To enable iommu support in
>> openstack with the above interface we would have to do two things. 1 extend
>> the image metatdata
>> or flavor extra specs to allow requesting a vIOMMU. Second we would have to
>> modify os-vif to produce
>> the add-port command above. Using this interfaces gives us a nice parity in
>> our api
>> as we only enable iommu support in ovs if we enable for qemu. E.g. if the
>> falvor/image does not request
>> a virtualized iommu we wont declare its support in the options.
>
> As an alternative, how about having a global other_config (to be set
> like vhost-socket-dir can be) for this instead of having to set it
> for each individual interface. It would mean that it would only have
> to be set once, instead of having this (ugly?!) option every time a
> vhost port is added, so it's a less intrusive change and I can't
> really think that a user would require to do this per vhostclient
>> [Mooney, Sean K]  well one taught that instantly comes to mind is if I set
>> The global other_config option what happens if I do not request a iommu in
>> qemu
>> Or I have an old qemu.  If it would have any negative effect on network
>> connectivity
>> Or prevent the vm from functioning, that would require the nova scheduler to
>> be
>> Aware of which node had this option set and take that into account when
>> placing vms.
>> I assume if it had no negative effects  then we would not need a option,
>> global or per
>> Interface.
> interface??? It's pain to have to add this at all 

Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU feature

2017-11-27 Thread Kavanagh, Mark B


>From: Mooney, Sean K
>Sent: Sunday, November 26, 2017 11:28 PM
>To: Kavanagh, Mark B ; Jan Scheurich
>; Kevin Traynor ;
>d...@openvswitch.org
>Cc: maxime.coque...@redhat.com; Flavio Leitner ; Franck Baudin
>; Ilya Maximets ; Stokes, Ian
>; Loftus, Ciara ; Darrell Ball
>; Aaron Conole ; Mooney, Sean K
>
>Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost IOMMU
>feature
>
>
>
>> -Original Message-
>> From: Kavanagh, Mark B
>> Sent: Friday, November 24, 2017 4:45 PM
>> To: Jan Scheurich ; Kevin Traynor
>> ; d...@openvswitch.org
>> Cc: maxime.coque...@redhat.com; Flavio Leitner ; Franck
>> Baudin ; Mooney, Sean K ;
>> Ilya Maximets ; Stokes, Ian
>> ; Loftus, Ciara ; Darrell
>> Ball ; Aaron Conole 
>> Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost
>> IOMMU feature
>>
>> Sounds good guys - I'll get cracking on this on Monday.
>>
>> Cheers,
>> Mark
>>
>> >-Original Message-
>> >From: Jan Scheurich [mailto:jan.scheur...@ericsson.com]
>> >Sent: Friday, November 24, 2017 4:21 PM
>> >To: Kevin Traynor ; Kavanagh, Mark B
>> >; d...@openvswitch.org
>> >Cc: maxime.coque...@redhat.com; Flavio Leitner ;
>> Franck
>> >Baudin ; Mooney, Sean K ;
>> >Ilya Maximets ; Stokes, Ian
>> >; Loftus, Ciara ;
>> Darrell
>> >Ball ; Aaron Conole 
>> >Subject: RE: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for vhost
>> >IOMMU feature
>> >
>> >+1
>> >Jan
>> >
>> >> -Original Message-
>> >> From: Kevin Traynor [mailto:ktray...@redhat.com]
>> >> Sent: Friday, 24 November, 2017 17:11
>> >> To: Mark Kavanagh ; d...@openvswitch.org
>> >> Cc: maxime.coque...@redhat.com; Flavio Leitner ;
>> >> Franck
>> >Baudin ; Mooney, Sean K
>> >> ; Ilya Maximets ;
>> >> Ian
>> >Stokes ; Loftus, Ciara
>> >> ; Darrell Ball ; Aaron
>> >> Conole
>> >; Jan Scheurich
>> >> 
>> >> Subject: Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: add support for
>> vhost
>> >> IOMMU
>> >feature
>> >>
>> >> On 11/16/2017 11:01 AM, Mark Kavanagh wrote:
>> >> > DPDK v17.11 introduces support for the vHost IOMMU feature.
>> >> > This is a security feature, that restricts the vhost memory that a
>> >> > virtio device may access.
>> >> >
>> >> > This feature also enables the vhost REPLY_ACK protocol, the
>> >> > implementation of which is known to work in newer versions of QEMU
>> >> > (i.e. v2.10.0), but is buggy in older versions (v2.7.0 - v2.9.0,
>> >> > inclusive). As such, the feature is disabled by default in (and
>> >> > should remain so, for the aforementioned older QEMU verions).
>> >> > Starting with QEMU v2.9.1, vhost-iommu-support can safely be
>> >> > enabled, even without having an IOMMU device, with no performance
>> >> > penalty.
>> >> >
>> >> > This patch adds a new vhost port option, vhost-iommu-support, to
>> >> > allow enablement of the vhost IOMMU feature:
>> >> >
>> >> > $ ovs-vsctl add-port br0 vhost-client-1 \
>> >> > -- set Interface vhost-client-1 type=dpdkvhostuserclient \
>> >> >  options:vhost-server-path=$VHOST_USER_SOCKET_PATH   \
>> >> >  options:vhost-iommu-support=true
>> >> >
>> >>
>> >> Hi Mark, All,
>> >>
>> >> I'm thinking about this and whether the current approach provides
>> >> more than what is actually needed by users at the cost of making the
>> >> user interface more complex.
>[Mooney, Sean K] I am personally split on this. To enable iommu support in
>openstack with the above interface we would have to do two things. 1 extend
>the image metatdata
>or flavor extra specs to allow requesting a vIOMMU. Second we would have to
>modify os-vif to produce
>the add-port command above. Using this interfaces gives us a nice parity in
>our api
>as we only enable iommu support in ovs if we enable for qemu. E.g. if the
>falvor/image does not request
>a virtualized iommu we wont declare its support in the options.
>> >>
>> >> As an alternative, how about having a global other_config (to be set
>> >> like vhost-socket-dir can be) for this instead of having to set it
>> >> for each individual interface. It would mean that it would only have
>> >> to be set once, instead of having this (ugly?!) option every time a
>> >> vhost port is added, so it's a less intrusive change and I can't
>> >> really think that a user would require to do this per vhostclient
>[Mooney, Sean K]  well one taught that instantly comes to mind is if I set
>The global other_config option what happens if I do not request a iommu in
>qemu
>Or I have an old qemu.  If it would have any negative effect on network
>connectivity
>Or prevent the vm from functioning, that would require the nova scheduler to
>be
>Aware of which node had this option set and take that into account when
>placing vms.
>I assume if it had no negative effects  then we would not need a option,
>global or per
>Interface.
>> >> interface??? It's pain to have to add this at all for a bug in QEMU
>> >> and I'm sure in 1/2/3 years time someone will say that users could
>> >> still be using QEMU < 2.9.1 and we can't remove 

Re: [ovs-dev] [PATCH] Revert "dpif_netdev: Refactor dp_netdev_pmd_thread structure."

2017-11-27 Thread Ilya Maximets
On 24.11.2017 19:08, Bodireddy, Bhanuprakash wrote:
>> On 22.11.2017 20:14, Bodireddy, Bhanuprakash wrote:
 This reverts commit a807c15796ddc43ba1ffb2a6b0bd2ad4e2b73941.

 Padding and aligning of dp_netdev_pmd_thread structure members is
 useless, broken in a several ways and only greatly degrades
 maintainability and extensibility of the structure.
>>>
>>> The idea of my earlier patch was to mark the cache lines and reduce the
>> holes while still maintaining the grouping of related members in this 
>> structure.
>>
>> Some of the grouping aspects looks strange. For example, it looks illogical 
>> that
>> 'exit_latch' is grouped with 'flow_table' but not the 'reload_seq' and other
>> reload related stuff. It looks strange that statistics and counters spread 
>> across
>> different groups. So, IMHO, it's not well grouped.
> 
> I had to strike a fine balance and some members may be placed in a different 
> group
> due to their sizes and importance. Let me think if I can make it better.
> 
>>
>>> Also cache line marking is a good practice to make some one extra cautious
>> when extending or editing important structures .
>>> Most importantly I was experimenting with prefetching on this structure
>> and needed cache line markers for it.
>>>
>>> I see that you are on ARM (I don't have HW to test) and want to know if this
>> commit has any negative affect and any numbers would be appreciated.
>>
>> Basic VM-VM testing shows stable 0.5% perfromance improvement with
>> revert applied.
> 
> I did P2P, PVP and PVVP with IXIA and haven't noticed any drop on X86.  
> 
>> Padding adds 560 additional bytes of holes.
> As the cache line in ARM is 128 , it created holes, I can find a workaround 
> to handle this.
> 
>>
>>> More comments inline.
>>>

 Issues:

1. It's not working because all the instances of struct
   dp_netdev_pmd_thread allocated only by usual malloc. All the
   memory is not aligned to cachelines -> structure almost never
   starts at aligned memory address. This means that any further
   paddings and alignments inside the structure are completely
   useless. Fo example:

   Breakpoint 1, pmd_thread_main
   (gdb) p pmd
   $49 = (struct dp_netdev_pmd_thread *) 0x1b1af20
   (gdb) p &pmd->cacheline1
   $51 = (OVS_CACHE_LINE_MARKER *) 0x1b1af60
   (gdb) p &pmd->cacheline0
   $52 = (OVS_CACHE_LINE_MARKER *) 0x1b1af20
   (gdb) p &pmd->flow_cache
   $53 = (struct emc_cache *) 0x1b1afe0

   All of the above addresses shifted from cacheline start by 32B.
>>>
>>> If you see below all the addresses are 64 byte aligned.
>>>
>>> (gdb) p pmd
>>> $1 = (struct dp_netdev_pmd_thread *) 0x7fc1e9b1a040
>>> (gdb) p &pmd->cacheline0
>>> $2 = (OVS_CACHE_LINE_MARKER *) 0x7fc1e9b1a040
>>> (gdb) p &pmd->cacheline1
>>> $3 = (OVS_CACHE_LINE_MARKER *) 0x7fc1e9b1a080
>>> (gdb) p &pmd->flow_cache
>>> $4 = (struct emc_cache *) 0x7fc1e9b1a0c0
>>> (gdb) p &pmd->flow_table
>>> $5 = (struct cmap *) 0x7fc1e9fba100
>>> (gdb) p &pmd->stats
>>> $6 = (struct dp_netdev_pmd_stats *) 0x7fc1e9fba140
>>> (gdb) p &pmd->port_mutex
>>> $7 = (struct ovs_mutex *) 0x7fc1e9fba180
>>> (gdb) p &pmd->poll_list
>>> $8 = (struct hmap *) 0x7fc1e9fba1c0
>>> (gdb) p &pmd->tnl_port_cache
>>> $9 = (struct hmap *) 0x7fc1e9fba200
>>> (gdb) p &pmd->stats_zero
>>> $10 = (unsigned long long (*)[5]) 0x7fc1e9fba240
>>>
>>> I tried using xzalloc_cacheline instead of default xzalloc() here.  I
>>> tried tens of times and always found that the address is
>>> 64 byte aligned and it should start at the beginning of cache line on X86.
>>> Not sure why the comment  " (The memory returned will not be at the start
>> of  a cache line, though, so don't assume such alignment.)" says otherwise?
>>
>> Yes, you will always get aligned addressess on your x86 Linux system that
>> supports
>> posix_memalign() call. The comment says what it says because it will make
>> some memory allocation tricks in case posix_memalign() is not available
>> (Windows, some MacOS, maybe some Linux systems (not sure)) and the
>> address will not be aligned it this case.
> 
> I also verified the other case when posix_memalign isn't available and even 
> in that case
> it returns the address aligned on CACHE_LINE_SIZE boundary. I will send out a 
> patch to use
>  xzalloc_cacheline for allocating the memory.

I don't know how you tested this, because it is impossible:

1. OVS allocates some memory:
base = xmalloc(...);

2. Rounds it up to the cache line start:
payload = (void **) ROUND_UP((uintptr_t) base, CACHE_LINE_SIZE);

3. Returns the pointer increased by 8 bytes:
return (char *) payload + MEM_ALIGN;

So, unless you redefined MEM_ALIGN to zero, you will never get aligned memory
address while allocating by xmalloc_cacheline() on system without 
posix_memalign().


> 
>

[ovs-dev] [PATCH] openvswitch: use ktime_get_ts64() instead of ktime_get_ts()

2017-11-27 Thread Arnd Bergmann
timespec is deprecated because of the y2038 overflow, so let's convert
this one to ktime_get_ts64(). The code is already safe even on 32-bit
architectures, since it uses monotonic times. On 64-bit architectures,
nothing changes, while on 32-bit architectures this avoids one
type conversion.

Signed-off-by: Arnd Bergmann 
---
 net/openvswitch/flow.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index dbe2379329c5..76d050aba7a4 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -56,12 +56,12 @@
 
 u64 ovs_flow_used_time(unsigned long flow_jiffies)
 {
-   struct timespec cur_ts;
+   struct timespec64 cur_ts;
u64 cur_ms, idle_ms;
 
-   ktime_get_ts(&cur_ts);
+   ktime_get_ts64(&cur_ts);
idle_ms = jiffies_to_msecs(jiffies - flow_jiffies);
-   cur_ms = (u64)cur_ts.tv_sec * MSEC_PER_SEC +
+   cur_ms = (u64)(u32)cur_ts.tv_sec * MSEC_PER_SEC +
 cur_ts.tv_nsec / NSEC_PER_MSEC;
 
return cur_ms - idle_ms;
-- 
2.9.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Revert "dpif_netdev: Refactor dp_netdev_pmd_thread structure."

2017-11-27 Thread Jan Scheurich
I agree with Ilya here. Adding theses cache line markers and re-grouping 
variables to minimize gaps in cache lines is creating a maintenance burden 
without any tangible benefit. I have had to go through the pain of refactoring 
my PMD Performance Metrics patch to the new dp_netdev_pmd_thread struct and 
spent a lot of time to analyze the actual memory layout with GDB and play 
Tetris with the variables.

There will never be more than a handful of PMDs, so minimizing the gaps does 
not matter from memory perspective. And whether the individual members occupy 4 
or 5 cache lines does not matter either compared to the many hundred cache 
lines touched for EMC and DPCLS lookups of an Rx batch. And any optimization 
done for x86 is not necessarily optimal for other architectures.

Finally, even for x86 there is not even a performance improvement. I re-ran our 
standard L3VPN over VXLAN performance PVP test on master and with Ilya's revert 
patch:

Flows   master  reverted
8,  4.464.48
100,4.274.29
1000,   4.074.07
2000,   3.683.68
5000,   3.033.03
1,  2.762.77
2,  2.642.65
5,  2.602.61
10, 2.602.61
50, 2.602.61

All in all, I support reverting this change.

Regards, Jan

Acked-by: Jan Scheurich 

> -Original Message-
> From: ovs-dev-boun...@openvswitch.org 
> [mailto:ovs-dev-boun...@openvswitch.org] On Behalf Of Bodireddy, Bhanuprakash
> Sent: Friday, 24 November, 2017 17:09
> To: Ilya Maximets ; ovs-dev@openvswitch.org; Ben 
> Pfaff 
> Cc: Heetae Ahn 
> Subject: Re: [ovs-dev] [PATCH] Revert "dpif_netdev: Refactor 
> dp_netdev_pmd_thread structure."
> 
> >On 22.11.2017 20:14, Bodireddy, Bhanuprakash wrote:
> >>> This reverts commit a807c15796ddc43ba1ffb2a6b0bd2ad4e2b73941.
> >>>
> >>> Padding and aligning of dp_netdev_pmd_thread structure members is
> >>> useless, broken in a several ways and only greatly degrades
> >>> maintainability and extensibility of the structure.
> >>
> >> The idea of my earlier patch was to mark the cache lines and reduce the
> >holes while still maintaining the grouping of related members in this 
> >structure.
> >
> >Some of the grouping aspects looks strange. For example, it looks illogical 
> >that
> >'exit_latch' is grouped with 'flow_table' but not the 'reload_seq' and other
> >reload related stuff. It looks strange that statistics and counters spread 
> >across
> >different groups. So, IMHO, it's not well grouped.
> 
> I had to strike a fine balance and some members may be placed in a different 
> group
> due to their sizes and importance. Let me think if I can make it better.
> 
> >
> >> Also cache line marking is a good practice to make some one extra cautious
> >when extending or editing important structures .
> >> Most importantly I was experimenting with prefetching on this structure
> >and needed cache line markers for it.
> >>
> >> I see that you are on ARM (I don't have HW to test) and want to know if 
> >> this
> >commit has any negative affect and any numbers would be appreciated.
> >
> >Basic VM-VM testing shows stable 0.5% perfromance improvement with
> >revert applied.
> 
> I did P2P, PVP and PVVP with IXIA and haven't noticed any drop on X86.
> 
> >Padding adds 560 additional bytes of holes.
> As the cache line in ARM is 128 , it created holes, I can find a workaround 
> to handle this.
> 
> >
> >> More comments inline.
> >>
> >>>
> >>> Issues:
> >>>
> >>>1. It's not working because all the instances of struct
> >>>   dp_netdev_pmd_thread allocated only by usual malloc. All the
> >>>   memory is not aligned to cachelines -> structure almost never
> >>>   starts at aligned memory address. This means that any further
> >>>   paddings and alignments inside the structure are completely
> >>>   useless. Fo example:
> >>>
> >>>   Breakpoint 1, pmd_thread_main
> >>>   (gdb) p pmd
> >>>   $49 = (struct dp_netdev_pmd_thread *) 0x1b1af20
> >>>   (gdb) p &pmd->cacheline1
> >>>   $51 = (OVS_CACHE_LINE_MARKER *) 0x1b1af60
> >>>   (gdb) p &pmd->cacheline0
> >>>   $52 = (OVS_CACHE_LINE_MARKER *) 0x1b1af20
> >>>   (gdb) p &pmd->flow_cache
> >>>   $53 = (struct emc_cache *) 0x1b1afe0
> >>>
> >>>   All of the above addresses shifted from cacheline start by 32B.
> >>
> >> If you see below all the addresses are 64 byte aligned.
> >>
> >> (gdb) p pmd
> >> $1 = (struct dp_netdev_pmd_thread *) 0x7fc1e9b1a040
> >> (gdb) p &pmd->cacheline0
> >> $2 = (OVS_CACHE_LINE_MARKER *) 0x7fc1e9b1a040
> >> (gdb) p &pmd->cacheline1
> >> $3 = (OVS_CACHE_LINE_MARKER *) 0x7fc1e9b1a080
> >> (gdb) p &pmd->flow_cache
> >> $4 = (struct emc_cache *) 0x7fc1e9b1a0c0
> >> (gdb) p &pmd->flow_table
> >> $5 = (struct cmap *) 0x7fc1e9fba100
> >> (gdb) p &pmd->stats
> >> $6 = (struct dp_netdev_pmd_stats *) 0x7fc1e9fba140
> >> (gdb) p &pmd->port_mutex
> >> $7 = (struct ovs_mutex *) 0x7fc1e9fba180
> >> (gdb) p &pmd->poll_list
> >> $8 = (struct hmap *) 0x7fc1e9fba1c0
> >> (gdb) p &

Re: [ovs-dev] In ovs-2.4 version, why can't stop dpdk-bond port by `ovs-ofctl mod br dpdkbond DOWN`?

2017-11-27 Thread Sam
The "dev->port_id" is bond port id, should I use slave port id instead ?

2017-11-27 18:07 GMT+08:00 Sam :

> Hi all,
>
> I'm working in ovs 2.4 version, and I use "dpdkb" type netdev. Then I
> call  `ovs-ofctl mod br dpdkbond DOWN` to down this device, at last, my
> code call "rte_eth_dev_stop(dev->port_id);".
>
> But counter shows the port is still rx/tx, why?
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] In ovs-2.4 version, why can't stop dpdk-bond port by `ovs-ofctl mod br dpdkbond DOWN`?

2017-11-27 Thread Sam
Hi all,

I'm working in ovs 2.4 version, and I use "dpdkb" type netdev. Then I
call  `ovs-ofctl mod br dpdkbond DOWN` to down this device, at last, my
code call "rte_eth_dev_stop(dev->port_id);".

But counter shows the port is still rx/tx, why?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Failed to trace path into open flow groups

2017-11-27 Thread Yongsheng Gong
Hi,

Ovs version 2.5.2:
Os: ubuntu 16.04

I have flows which in open flow groups, and I want to trace the path of one 
kind of flow to know how it is got to a certain open flow port:

Below is my actions in groups:

gongysh@ubuntu16:~/routing$ sudo ovs-ofctl -O OpenFlow13 dump-groups s204
OFPST_GROUP_DESC reply (OF1.3) (xid=0x2):
 
group_id=536870968,type=indirect,bucket=actions=set_field:00:aa:00:00:00:02->eth_dst,set_field:00:00:00:00:02:04->eth_src,set_field:4116->vlan_vid,group:1310724
 
group_id=536870965,type=indirect,bucket=actions=set_field:00:aa:00:00:00:01->eth_dst,set_field:00:00:00:00:02:04->eth_src,set_field:4116->vlan_vid,group:1310723
 
group_id=2449473560,type=indirect,bucket=actions=pop_vlan,push_mpls:0x8847,set_field:215->mpls_label,group:2415919127,push_vlan:0x8100,set_field:8191->vlan_vid
 group_id=3221225472,type=indirect,bucket=actions=pop_vlan,CONTROLLER:65535
 
group_id=1076363264,type=all,bucket=actions=group:2621445,bucket=actions=group:2621446
 group_id=2621446,type=indirect,bucket=actions=pop_vlan,output:6
 
group_id=2415919127,type=indirect,bucket=actions=set_field:00:00:00:00:02:27->eth_dst,set_field:00:00:00:00:02:04->eth_src,set_field:8190->vlan_vid,group:268304386
 
group_id=1879048219,type=select,bucket=actions=group:2449473560,bucket=actions=group:2449473562
 
group_id=1879048224,type=select,bucket=actions=group:2449473565,bucket=actions=group:2449473567
 group_id=2621445,type=indirect,bucket=actions=pop_vlan,output:5
 
group_id=2415919129,type=indirect,bucket=actions=set_field:00:00:00:00:02:26->eth_dst,set_field:00:00:00:00:02:04->eth_src,set_field:8190->vlan_vid,group:268304385
 group_id=1310724,type=indirect,bucket=actions=pop_vlan,output:4
 
group_id=2415919132,type=indirect,bucket=actions=set_field:00:00:00:00:02:27->eth_dst,set_field:00:00:00:00:02:04->eth_src,set_field:8190->vlan_vid,group:268304386
 
group_id=2415919134,type=indirect,bucket=actions=set_field:00:00:00:00:02:26->eth_dst,set_field:00:00:00:00:02:04->eth_src,set_field:8190->vlan_vid,group:268304385
 group_id=1310723,type=indirect,bucket=actions=pop_vlan,output:3
 
group_id=2449473565,type=indirect,bucket=actions=pop_vlan,push_mpls:0x8847,set_field:205->mpls_label,group:2415919132,push_vlan:0x8100,set_field:8191->vlan_vid
 
group_id=2449473567,type=indirect,bucket=actions=pop_vlan,push_mpls:0x8847,set_field:205->mpls_label,group:2415919134,push_vlan:0x8100,set_field:8191->vlan_vid
 group_id=268304386,type=indirect,bucket=actions=pop_vlan,output:2
 
group_id=1075052544,type=all,bucket=actions=group:1310724,bucket=actions=group:1310723
 group_id=268304385,type=indirect,bucket=actions=pop_vlan,output:1
 
group_id=2449473562,type=indirect,bucket=actions=pop_vlan,push_mpls:0x8847,set_field:215->mpls_label,group:2415919129,push_vlan:0x8100,set_field:8191->vlan_vid


Here is my trace command and its output:
gongysh@ubuntu16:~/routing$ sudo ovs-appctl ofproto/trace s204 
in_port=3,dl_type=0x0800,dl_src=00:aa:00:00:00:01,nw_src=10.0.2.1,dl_dst=00:00:00:00:02:04,nw_dst=10.0.99.2,nw_proto=1
Bridge: s204
Flow: 
icmp,in_port=3,vlan_tci=0x,dl_src=00:aa:00:00:00:01,dl_dst=00:00:00:00:02:04,nw_src=10.0.2.1,nw_dst=10.0.99.2,nw_tos=0,nw_ecn=0,nw_ttl=0,icmp_type=0,icmp_code=0

Rule: table=0 cookie=0x85bd57cb49 priority=0
OpenFlow actions=goto_table:10

Resubmitted flow: 
icmp,in_port=3,vlan_tci=0x,dl_src=00:aa:00:00:00:01,dl_dst=00:00:00:00:02:04,nw_src=10.0.2.1,nw_dst=10.0.99.2,nw_tos=0,nw_ecn=0,nw_ttl=0,icmp_type=0,icmp_code=0
Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 reg5=0x0 
reg6=0x0 reg7=0x0
Resubmitted  odp: drop
Resubmitted megaflow: 
recirc_id=0,ip,in_port=3,vlan_tci=0x/0x1fff,nw_frag=no
Rule: table=10 cookie=0x7ad4173ff7 in_port=3,vlan_tci=0x/0x1fff
OpenFlow actions=push_vlan:0x8100,set_field:4116->vlan_vid,goto_table:20

Resubmitted flow: 
icmp,in_port=3,dl_vlan=20,dl_vlan_pcp=0,dl_src=00:aa:00:00:00:01,dl_dst=00:00:00:00:02:04,nw_src=10.0.2.1,nw_dst=10.0.99.2,nw_tos=0,nw_ecn=0,nw_ttl=0,icmp_type=0,icmp_code=0
Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 reg4=0x0 
reg5=0x0 reg6=0x0 reg7=0x0
Resubmitted  odp: drop
Resubmitted megaflow: 
recirc_id=0,ip,in_port=3,vlan_tci=0x,dl_dst=00:00:00:00:02:04,nw_frag=no
Rule: table=20 cookie=0x7a4b3d1965 
ip,in_port=3,dl_vlan=20,dl_dst=00:00:00:00:02:04
OpenFlow actions=goto_table:30

Resubmitted flow: unchanged
Resubmitted regs: reg0=0x0 reg1=0x0 reg2=0x0 reg3=0x0 
reg4=0x0 reg5=0x0 reg6=0x0 reg7=0x0
Resubmitted  odp: drop
Resubmitted megaflow: 
recirc_id=0,ip,in_port=3,vlan_tci=0x,dl_dst=00:00:00:00:02:04,nw_dst=10.0.99.0/24,nw_frag=no
Rule: table=30 cookie=0x7a2deb373b 
priority=48010,ip,nw_dst=10.0.99.0/24
OpenFlow