[ovs-dev] [PATCH v3] Add configurable OpenFlow port name.

2016-04-22 Thread Xiao Liang
Add new column "ofname" in Interface table to configure port name reported
to controllers with OpenFlow protocol, thus decouple OpenFlow port name from
device name.

For example:
# ovs-vsctl set Interface eth0 ofname=wan
# ovs-vsctl set Interface eth1 ofname=lan0
then controllers can recognize ports by their names.

Signed-off-by: Xiao Liang 
---
v2: Added test for ofname
Increased db schema version
Updated NEWS
v3: Rebase
---
 NEWS   |  1 +
 lib/db-ctl-base.h  |  2 +-
 ofproto/ofproto-provider.h |  1 +
 ofproto/ofproto.c  | 67 --
 ofproto/ofproto.h  |  9 ++-
 tests/ofproto.at   | 60 +
 utilities/ovs-vsctl.c  |  1 +
 vswitchd/bridge.c  | 10 +--
 vswitchd/vswitch.ovsschema |  6 +++--
 vswitchd/vswitch.xml   | 14 ++
 10 files changed, 163 insertions(+), 8 deletions(-)

diff --git a/NEWS b/NEWS
index ea7f3a1..156781c 100644
--- a/NEWS
+++ b/NEWS
@@ -15,6 +15,7 @@ Post-v2.5.0
now implemented.  Only flow mod and port mod messages are supported
in bundles.
  * New OpenFlow extension NXM_NX_MPLS_TTL to provide access to MPLS TTL.
+ * Port name can now be set with "ofname" column in the Interface table.
- ovs-ofctl:
  * queue-get-config command now allows a queue ID to be specified.
  * '--bundle' option can now be used with OpenFlow 1.3.
diff --git a/lib/db-ctl-base.h b/lib/db-ctl-base.h
index f8f576b..5bd62d5 100644
--- a/lib/db-ctl-base.h
+++ b/lib/db-ctl-base.h
@@ -177,7 +177,7 @@ struct weak_ref_table {
 struct cmd_show_table {
 const struct ovsdb_idl_table_class *table;
 const struct ovsdb_idl_column *name_column;
-const struct ovsdb_idl_column *columns[3]; /* Seems like a good number. */
+const struct ovsdb_idl_column *columns[4]; /* Seems like a good number. */
 const struct weak_ref_table wref_table;
 };
 
diff --git a/ofproto/ofproto-provider.h b/ofproto/ofproto-provider.h
index daa0077..8795242 100644
--- a/ofproto/ofproto-provider.h
+++ b/ofproto/ofproto-provider.h
@@ -84,6 +84,7 @@ struct ofproto {
 struct hmap ports;  /* Contains "struct ofport"s. */
 struct shash port_by_name;
 struct simap ofp_requests;  /* OpenFlow port number requests. */
+struct smap ofp_names;  /* OpenFlow port names. */
 uint16_t alloc_port_no; /* Last allocated OpenFlow port number. */
 uint16_t max_ports; /* Max possible OpenFlow port num, plus one. */
 struct hmap ofport_usage;   /* Map ofport to last used time. */
diff --git a/ofproto/ofproto.c b/ofproto/ofproto.c
index ff6affd..a2799f4 100644
--- a/ofproto/ofproto.c
+++ b/ofproto/ofproto.c
@@ -550,6 +550,7 @@ ofproto_create(const char *datapath_name, const char 
*datapath_type,
 hmap_init(>ofport_usage);
 shash_init(>port_by_name);
 simap_init(>ofp_requests);
+smap_init(>ofp_names);
 ofproto->max_ports = ofp_to_u16(OFPP_MAX);
 ofproto->eviction_group_timer = LLONG_MIN;
 ofproto->tables = NULL;
@@ -1546,6 +1547,7 @@ ofproto_destroy__(struct ofproto *ofproto)
 hmap_destroy(>ofport_usage);
 shash_destroy(>port_by_name);
 simap_destroy(>ofp_requests);
+smap_destroy(>ofp_names);
 
 OFPROTO_FOR_EACH_TABLE (table, ofproto) {
 oftable_destroy(table);
@@ -1945,7 +1947,7 @@ ofproto_port_open_type(const char *datapath_type, const 
char *port_type)
  * 'ofp_portp' is non-null). */
 int
 ofproto_port_add(struct ofproto *ofproto, struct netdev *netdev,
- ofp_port_t *ofp_portp)
+ ofp_port_t *ofp_portp, const char *ofp_name)
 {
 ofp_port_t ofp_port = ofp_portp ? *ofp_portp : OFPP_NONE;
 int error;
@@ -1956,6 +1958,9 @@ ofproto_port_add(struct ofproto *ofproto, struct netdev 
*netdev,
 
 simap_put(>ofp_requests, netdev_name,
   ofp_to_u16(ofp_port));
+if (ofp_name) {
+smap_replace(>ofp_names, netdev_name, ofp_name);
+}
 error = update_port(ofproto, netdev_name);
 }
 if (ofp_portp) {
@@ -2009,6 +2014,8 @@ ofproto_port_del(struct ofproto *ofproto, ofp_port_t 
ofp_port)
 simap_delete(>ofp_requests, ofp_request_node);
 }
 
+smap_remove(>ofp_names, name);
+
 error = ofproto->ofproto_class->port_del(ofproto, ofp_port);
 if (!error && ofport) {
 /* 'name' is the netdev's name and update_port() is going to close the
@@ -2285,6 +2292,7 @@ ofport_open(struct ofproto *ofproto,
 {
 enum netdev_flags flags;
 struct netdev *netdev;
+const char *ofp_name;
 int error;
 
 error = netdev_open(ofproto_port->name, ofproto_port->type, );
@@ -2307,7 +2315,13 @@ ofport_open(struct ofproto *ofproto,
 }
 pp->port_no = ofproto_port->ofp_port;
 netdev_get_etheraddr(netdev, >hw_addr);
-ovs_strlcpy(pp->name, ofproto_port->name, sizeof pp->name);
+
+ofp_name = 

Re: [ovs-dev] [PATCH v2] packets: use flow protocol when recalculating ipv6 checksums

2016-04-22 Thread Simon Horman
On Fri, Apr 22, 2016 at 10:10:38AM -0700, Ben Pfaff wrote:
> On Fri, Apr 22, 2016 at 10:22:56PM +1000, Simon Horman wrote:
> > When using masked actions the ipv6_proto field of an action
> > to set IPv6 fields may be zero rather than the prevailing protocol
> > which will result in skipping checksum recalculation.
> > 
> > This patch resolves the problem by relying on the protocol
> > in the packet rather than that in the set field action.
> > 
> > A similar fix for the kernel datapath has been accepted into David Miller's
> > 'net' tree as b4f70527f052 ("openvswitch: use flow protocol when
> > recalculating ipv6 checksums").
> > 
> > Cc: Jarno Rajahalme 
> > Fixes: 6d670e7f0d45 ("lib/odp: Masked set action execution and printing.")
> > Signed-off-by: Simon Horman 
> > ---
> > While preparing this I noticed that there does seem to be some scope
> > to consolidate packet_rh_present() and part of miniflow_extract().
> > 
> > v2
> > * Updated changelog to refer to protocol in packet, as this patch does,
> >   rather than the flow key, which the kernel datapath variant of the patch
> >   does. This was a copy-paste error in preparing the changelog.
> 
> GCC 4.9.1 gives me the following error:
> 
> ../lib/packets.c: In function 'packet_set_ipv6':
> ../lib/packets.c:951:9: error: 'proto' may be used uninitialized in this 
> function [-Werror=maybe-uninitialized]
>  packet_update_csum128(packet, proto, addr, new_addr);
>  ^
> ../lib/packets.c:1018:13: note: 'proto' was declared here
>  uint8_t proto;
>  ^
> 
> Other than that, this seems like a good bug fix, thank you!
> 
> Acked-by: Ben Pfaff 

Thanks.

* I have resolve the problem you highlighted by initialising proto to zero:
uint8_t proto = 0;
* I have pushed the updated patch to master with your Ack.
* I have also pushed a backport to branch-2.5 and branch-2.4 as
  I was able to observe the problem there.
* I also tested branch-2.3 but the problem does not appear to be present there.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] util.h: Restore stdarg.h which is necessary for va_list

2016-04-22 Thread Takashi YAMAMOTO
On Sat, Apr 23, 2016 at 12:28 PM, Ben Pfaff  wrote:

> On Sat, Apr 23, 2016 at 11:14:40AM +0900, YAMAMOTO Takashi wrote:
> > Fixes a regression in commit b44ff8d826535025f4f8d12808c4ef36a7a8 .
> > ("Misc cleanup with "util.h" header files")
> >
> > Signed-off-by: YAMAMOTO Takashi 
>
> Acked-by: Ben Pfaff 
>

thank you. pushed to master.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] util.h: Restore stdarg.h which is necessary for va_list

2016-04-22 Thread Ben Pfaff
On Sat, Apr 23, 2016 at 11:14:40AM +0900, YAMAMOTO Takashi wrote:
> Fixes a regression in commit b44ff8d826535025f4f8d12808c4ef36a7a8 .
> ("Misc cleanup with "util.h" header files")
> 
> Signed-off-by: YAMAMOTO Takashi 

Acked-by: Ben Pfaff 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 4/4] classifier: Avoid inserting duplicates to cmaps.

2016-04-22 Thread Ben Pfaff
Thanks!

On Fri, Apr 22, 2016 at 07:46:09PM -0700, Jarno Rajahalme wrote:
> Thanks for a thorough review Ben! I just sent a v2 to the list.
> 
> I addressed all your concerns and even found a small bug when testing with 
> the new test-ccmap.
> 
>   Jarno
> 
> > On Apr 21, 2016, at 2:42 PM, Ben Pfaff  wrote:
> > 
> > On Wed, Apr 13, 2016 at 07:06:46PM -0700, Jarno Rajahalme wrote:
> >> Staged lookup indices assumed that cmap is efficient fealing with
> >> duplicates.  Duplicates are implemented as linked lists, however,
> >> causing removal of rules to become (O^2) and highly cache-inefficient
> >> with large number of duplicates.
> >> 
> >> This was problematic especially when many rules shared the same match
> >> in packet metadata (e.g., a port number, but nothing else).
> >> 
> >> This patch fixes the problem by introducing a new 'count' variant of
> >> the cmap (ccmap), which can be efficiently used to keep counts of
> >> inserted hash values provided by the caller.  This does not require a
> >> node in the user data structure, so this makes the classifier
> >> implementation a bit more memory efficient, too.
> >> 
> >> Reported-by: Alok Kumar Maurya 
> >> Signed-off-by: Jarno Rajahalme 
> > 
> > At first I was unhappy that we needed a new data structure, then I
> > discovered that I like the new data structure.  Thanks for doing this.
> > 
> > This is a lot of new code to write without adding a test!  Can you adapt
> > test-cmap.c, or write something else, to test ccmap?
> > 
> > My compilers do not like this.  Clang 3.5.0:
> > 
> >../lib/ccmap.c:193:9: error: address argument to atomic operation must 
> > be a pointer to non-const _Atomic type ('const ccmap_node_t *' (aka 'const 
> > _Atomic(uint64_t) *') invalid)
> >../lib/ovs-atomic.h:393:5: note: expanded from macro 
> > 'atomic_read_relaxed'
> >../lib/ovs-atomic-clang.h:53:15: note: expanded from macro 
> > 'atomic_read_explicit'
> >../lib/ccmap.c:245:9: error: address argument to atomic operation must 
> > be a pointer to non-const _Atomic type ('const ccmap_node_t *' (aka 'const 
> > _Atomic(uint64_t) *') invalid)
> >../lib/ovs-atomic.h:393:5: note: expanded from macro 
> > 'atomic_read_relaxed'
> >../lib/ovs-atomic-clang.h:53:15: note: expanded from macro 
> > 'atomic_read_explicit'
> >../lib/ccmap.c:544:13: error: address argument to atomic operation must 
> > be a pointer to non-const _Atomic type ('const ccmap_node_t *' (aka 'const 
> > _Atomic(uint64_t) *') invalid)
> >../lib/ovs-atomic.h:393:5: note: expanded from macro 
> > 'atomic_read_relaxed'
> >../lib/ovs-atomic-clang.h:53:15: note: expanded from macro 
> > 'atomic_read_explicit'
> > 
> > Sparse:
> > 
> >../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
> > modifiers)
> >../lib/ccmap.c:193:9:expected void *
> >../lib/ccmap.c:193:9:got unsigned long long const *
> >../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
> > modifiers)
> >../lib/ccmap.c:193:9:expected void *
> >../lib/ccmap.c:193:9:got unsigned long long const *
> >../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
> > modifiers)
> >../lib/ccmap.c:193:9:expected void *
> >../lib/ccmap.c:193:9:got unsigned long long const *
> >../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
> > modifiers)
> >../lib/ccmap.c:193:9:expected void *
> >../lib/ccmap.c:193:9:got unsigned long long const *
> >../lib/ccmap.c:245:9: warning: incorrect type in argument 1 (different 
> > modifiers)
> >../lib/ccmap.c:245:9:expected void *
> >../lib/ccmap.c:245:9:got unsigned long long const *
> >../lib/ccmap.c:245:9: warning: incorrect type in argument 1 (different 
> > modifiers)
> >../lib/ccmap.c:245:9:expected void *
> >../lib/ccmap.c:245:9:got unsigned long long const *
> >../lib/ccmap.c:544:13: warning: incorrect type in argument 1 (different 
> > modifiers)
> >../lib/ccmap.c:544:13:expected void *
> >../lib/ccmap.c:544:13:got unsigned long long const *
> >../lib/ccmap.c:544:13: warning: incorrect type in argument 1 (different 
> > modifiers)
> >../lib/ccmap.c:544:13:expected void *
> >../lib/ccmap.c:544:13:got unsigned long long const *
> > 
> > These comments and macros are the only mention of "entries", everything
> > else talks about "nodes", perhaps the names here should be updated.
> > Also, CCMAP_PADDING is never used and it is always 0 despite the
> > comment:
> > 
> >/* An entry is an 32-bit hash and a 32-bit count. */
> >#define CCMAP_ENTRY_SIZE (4 + 4)
> > 
> >/* Number of entries per bucket: 8 */
> >#define CCMAP_K (CACHE_LINE_SIZE / CCMAP_ENTRY_SIZE)
> > 
> >/* Pad to make a bucket a full cache line in size: 4 on 32-bit, 0 on 
> > 64-bit. */
> >#define CCMAP_PADDING 

[ovs-dev] [PATCH v3 5/5] classifier: Use ccmaps for staged lookup indices.

2016-04-22 Thread Jarno Rajahalme
Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya 
Signed-off-by: Jarno Rajahalme 
---
 lib/classifier-private.h |  8 
 lib/classifier.c | 44 
 2 files changed, 16 insertions(+), 36 deletions(-)

diff --git a/lib/classifier-private.h b/lib/classifier-private.h
index babb64f..eb47a4c 100644
--- a/lib/classifier-private.h
+++ b/lib/classifier-private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2015 Nicira, Inc.
+ * Copyright (c) 2014, 2015, 2016 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #ifndef CLASSIFIER_PRIVATE_H
 #define CLASSIFIER_PRIVATE_H 1
 
+#include "ccmap.h"
 #include "cmap.h"
 #include "flow.h"
 #include "hash.h"
@@ -44,7 +45,7 @@ struct cls_subtable {
 unsigned int trie_plen[CLS_MAX_TRIES];  /* Trie prefix length in 'mask'
  * (runtime configurable). */
 const int ports_mask_len;
-struct cmap indices[CLS_MAX_INDICES];   /* Staged lookup indices. */
+struct ccmap indices[CLS_MAX_INDICES];  /* Staged lookup indices. */
 rcu_trie_ptr ports_trie;/* NULL if none. */
 
 /* These fields are accessed by all readers. */
@@ -67,8 +68,7 @@ struct cls_match {
 
 /* Accessed by readers interested in wildcarding. */
 const int priority; /* Larger numbers are higher priorities. */
-struct cmap_node index_nodes[CLS_MAX_INDICES]; /* Within subtable's
-* 'indices'. */
+
 /* Accessed by all readers. */
 struct cmap_node cmap_node; /* Within struct cls_subtable 'rules'. */
 
diff --git a/lib/classifier.c b/lib/classifier.c
index 1557f6a..2b24724 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -499,21 +499,6 @@ static inline ovs_be32 minimatch_get_ports(const struct 
minimatch *match)
 & MINIFLOW_GET_BE32(>mask->masks, tp_src);
 }
 
-static void
-subtable_replace_head_rule(struct classifier *cls OVS_UNUSED,
-   struct cls_subtable *subtable,
-   struct cls_match *head, struct cls_match *new,
-   uint32_t hash, uint32_t ihash[CLS_MAX_INDICES])
-{
-/* Rule's data is already in the tries. */
-
-for (int i = 0; i < subtable->n_indices; i++) {
-cmap_replace(>indices[i], >index_nodes[i],
- >index_nodes[i], ihash[i]);
-}
-cmap_replace(>rules, >cmap_node, >cmap_node, hash);
-}
-
 /* Inserts 'rule' into 'cls' in 'version'.  Until 'rule' is removed from 'cls',
  * the caller must not modify or free it.
  *
@@ -587,15 +572,9 @@ classifier_replace(struct classifier *cls, const struct 
cls_rule *rule,
subtable->ports_mask_len);
 }
 
-/* Add new node to segment indices.
- *
- * Readers may find the rule in the indices before the rule is visible
- * in the subtables 'rules' map.  This may result in us losing the
- * opportunity to quit lookups earlier, resulting in sub-optimal
- * wildcarding.  This will be fixed later by revalidation (always
- * scheduled after flow table changes). */
+/* Add new node to segment indices. */
 for (i = 0; i < subtable->n_indices; i++) {
-cmap_insert(>indices[i], >index_nodes[i], ihash[i]);
+ccmap_inc(>indices[i], ihash[i]);
 }
 n_rules = cmap_insert(>rules, >cmap_node, hash);
 } else {   /* Equal rules exist in the classifier already. */
@@ -628,8 +607,8 @@ classifier_replace(struct classifier *cls, const struct 
cls_rule *rule,
 /* Replace the existing head in data structures, if rule is the new
  * head. */
 if (iter == head) {
-subtable_replace_head_rule(cls, subtable, head, new, hash,
-   ihash);
+cmap_replace(>rules, >cmap_node,
+ >cmap_node, hash);
 }
 
 if (old) {
@@ -780,7 +759,8 @@ classifier_remove(struct classifier 

Re: [ovs-dev] [PATCH 4/4] classifier: Avoid inserting duplicates to cmaps.

2016-04-22 Thread Jarno Rajahalme
Scrap the v2, I sent a v3 which avoids a portability problem I introduced in v2.

  Jarno

> On Apr 22, 2016, at 7:46 PM, Jarno Rajahalme  wrote:
> 
> Thanks for a thorough review Ben! I just sent a v2 to the list.
> 
> I addressed all your concerns and even found a small bug when testing with 
> the new test-ccmap.
> 
>   Jarno
> 
>> On Apr 21, 2016, at 2:42 PM, Ben Pfaff > 
>> wrote:
>> 
>> On Wed, Apr 13, 2016 at 07:06:46PM -0700, Jarno Rajahalme wrote:
>>> Staged lookup indices assumed that cmap is efficient fealing with
>>> duplicates.  Duplicates are implemented as linked lists, however,
>>> causing removal of rules to become (O^2) and highly cache-inefficient
>>> with large number of duplicates.
>>> 
>>> This was problematic especially when many rules shared the same match
>>> in packet metadata (e.g., a port number, but nothing else).
>>> 
>>> This patch fixes the problem by introducing a new 'count' variant of
>>> the cmap (ccmap), which can be efficiently used to keep counts of
>>> inserted hash values provided by the caller.  This does not require a
>>> node in the user data structure, so this makes the classifier
>>> implementation a bit more memory efficient, too.
>>> 
>>> Reported-by: Alok Kumar Maurya >> >
>>> Signed-off-by: Jarno Rajahalme >
>> 
>> At first I was unhappy that we needed a new data structure, then I
>> discovered that I like the new data structure.  Thanks for doing this.
>> 
>> This is a lot of new code to write without adding a test!  Can you adapt
>> test-cmap.c, or write something else, to test ccmap?
>> 
>> My compilers do not like this.  Clang 3.5.0:
>> 
>>../lib/ccmap.c:193:9: error: address argument to atomic operation must be 
>> a pointer to non-const _Atomic type ('const ccmap_node_t *' (aka 'const 
>> _Atomic(uint64_t) *') invalid)
>>../lib/ovs-atomic.h:393:5: note: expanded from macro 'atomic_read_relaxed'
>>../lib/ovs-atomic-clang.h:53:15: note: expanded from macro 
>> 'atomic_read_explicit'
>>../lib/ccmap.c:245:9: error: address argument to atomic operation must be 
>> a pointer to non-const _Atomic type ('const ccmap_node_t *' (aka 'const 
>> _Atomic(uint64_t) *') invalid)
>>../lib/ovs-atomic.h:393:5: note: expanded from macro 'atomic_read_relaxed'
>>../lib/ovs-atomic-clang.h:53:15: note: expanded from macro 
>> 'atomic_read_explicit'
>>../lib/ccmap.c:544:13: error: address argument to atomic operation must 
>> be a pointer to non-const _Atomic type ('const ccmap_node_t *' (aka 'const 
>> _Atomic(uint64_t) *') invalid)
>>../lib/ovs-atomic.h:393:5: note: expanded from macro 'atomic_read_relaxed'
>>../lib/ovs-atomic-clang.h:53:15: note: expanded from macro 
>> 'atomic_read_explicit'
>> 
>> Sparse:
>> 
>>../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
>> modifiers)
>>../lib/ccmap.c:193:9:expected void *
>>../lib/ccmap.c:193:9:got unsigned long long const *
>>../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
>> modifiers)
>>../lib/ccmap.c:193:9:expected void *
>>../lib/ccmap.c:193:9:got unsigned long long const *
>>../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
>> modifiers)
>>../lib/ccmap.c:193:9:expected void *
>>../lib/ccmap.c:193:9:got unsigned long long const *
>>../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
>> modifiers)
>>../lib/ccmap.c:193:9:expected void *
>>../lib/ccmap.c:193:9:got unsigned long long const *
>>../lib/ccmap.c:245:9: warning: incorrect type in argument 1 (different 
>> modifiers)
>>../lib/ccmap.c:245:9:expected void *
>>../lib/ccmap.c:245:9:got unsigned long long const *
>>../lib/ccmap.c:245:9: warning: incorrect type in argument 1 (different 
>> modifiers)
>>../lib/ccmap.c:245:9:expected void *
>>../lib/ccmap.c:245:9:got unsigned long long const *
>>../lib/ccmap.c:544:13: warning: incorrect type in argument 1 (different 
>> modifiers)
>>../lib/ccmap.c:544:13:expected void *
>>../lib/ccmap.c:544:13:got unsigned long long const *
>>../lib/ccmap.c:544:13: warning: incorrect type in argument 1 (different 
>> modifiers)
>>../lib/ccmap.c:544:13:expected void *
>>../lib/ccmap.c:544:13:got unsigned long long const *
>> 
>> These comments and macros are the only mention of "entries", everything
>> else talks about "nodes", perhaps the names here should be updated.
>> Also, CCMAP_PADDING is never used and it is always 0 despite the
>> comment:
>> 
>>/* An entry is an 32-bit hash and a 32-bit count. */
>>#define CCMAP_ENTRY_SIZE (4 + 4)
>> 
>>/* Number of entries per bucket: 8 */
>>#define CCMAP_K (CACHE_LINE_SIZE / CCMAP_ENTRY_SIZE)
>> 
>>/* Pad to make a bucket a full cache line in size: 4 on 

[ovs-dev] [PATCH v3 4/5] lib: Add new 'counting cmap' type.

2016-04-22 Thread Jarno Rajahalme
cmap implements duplicates as linked lists, which causes removal of
rules to become (O^2) with large number of duplicates.  This patch
fixes the problem by introducing a new 'counting' variant of the cmap
(ccmap), which can be efficiently used to keep counts of inserted hash
values provided by the caller.  This does not require a node in the
user data structure, so this makes the user implementation a bit more
memory efficient, too.

Signed-off-by: Jarno Rajahalme 
---
 lib/automake.mk|   2 +
 lib/ccmap.c| 581 +
 lib/ccmap.h|  64 ++
 tests/automake.mk  |   2 +
 tests/library.at   |   6 +
 tests/test-ccmap.c | 292 +++
 6 files changed, 947 insertions(+)
 create mode 100644 lib/ccmap.c
 create mode 100644 lib/ccmap.h
 create mode 100644 tests/test-ccmap.c

diff --git a/lib/automake.mk b/lib/automake.mk
index 1ec2115..c54c8f8 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -38,6 +38,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/classifier.c \
lib/classifier.h \
lib/classifier-private.h \
+   lib/ccmap.c \
+   lib/ccmap.h \
lib/cmap.c \
lib/cmap.h \
lib/colors.c \
diff --git a/lib/ccmap.c b/lib/ccmap.c
new file mode 100644
index 000..08359b5
--- /dev/null
+++ b/lib/ccmap.c
@@ -0,0 +1,581 @@
+/*
+ * Copyright (c) 2014, 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include "ccmap.h"
+#include "coverage.h"
+#include "bitmap.h"
+#include "hash.h"
+#include "ovs-rcu.h"
+#include "random.h"
+#include "util.h"
+
+COVERAGE_DEFINE(ccmap_expand);
+COVERAGE_DEFINE(ccmap_shrink);
+
+/* A count-only version of the cmap. */
+
+/* Allow protected access to the value without atomic semantics.  This makes
+ * the exclusive writer somewhat faster. */
+typedef union {
+unsigned long long protected_value;
+ATOMIC(unsigned long long) atomic_value;
+} ccmap_node_t;
+BUILD_ASSERT_DECL(sizeof(ccmap_node_t) == sizeof(uint64_t));
+
+static uint64_t
+ccmap_node_get(const ccmap_node_t *node)
+{
+uint64_t value;
+
+atomic_read_relaxed(_CAST(ccmap_node_t *, node)->atomic_value,
+);
+
+return value;
+}
+
+/* It is safe to allow compiler optimize reads by the exclusive writer. */
+static uint64_t
+ccmap_node_get_protected(const ccmap_node_t *node)
+{
+return node->protected_value;
+}
+
+static void
+ccmap_node_set_protected(ccmap_node_t *node, uint64_t value)
+{
+atomic_store_relaxed(>atomic_value, value);
+}
+
+static uint64_t
+ccmap_node(uint32_t count, uint32_t hash)
+{
+return (uint64_t)count << 32 | hash;
+}
+
+static uint32_t
+ccmap_node_hash(uint64_t node)
+{
+return node;
+}
+
+static uint32_t
+ccmap_node_count(uint64_t node)
+{
+return node >> 32;
+}
+
+/* Number of nodes per bucket. */
+#define CCMAP_K (CACHE_LINE_SIZE / sizeof(ccmap_node_t))
+
+/* A cuckoo hash bucket.  Designed to be cache-aligned and exactly one cache
+ * line long. */
+struct ccmap_bucket {
+/* Each node incudes both the hash (low 32-bits) and the count (high
+ * 32-bits), allowing readers always getting a consistent pair. */
+ccmap_node_t nodes[CCMAP_K];
+};
+BUILD_ASSERT_DECL(sizeof(struct ccmap_bucket) == CACHE_LINE_SIZE);
+
+/* Default maximum load factor (as a fraction of UINT32_MAX + 1) before
+ * enlarging a ccmap.  Reasonable values lie between about 75% and 93%.  
Smaller
+ * values waste memory; larger values increase the average insertion time. */
+#define CCMAP_MAX_LOAD ((uint32_t) (UINT32_MAX * .85))
+
+/* Default minimum load factor (as a fraction of UINT32_MAX + 1) before
+ * shrinking a ccmap.  Currently, the value is chosen to be 20%, this
+ * means ccmap will have a 40% load factor after shrink. */
+#define CCMAP_MIN_LOAD ((uint32_t) (UINT32_MAX * .20))
+
+/* The implementation of a concurrent hash map. */
+struct ccmap_impl {
+unsigned int n_unique;  /* Number of in-use nodes. */
+unsigned int n; /* Number of hashes inserted. */
+unsigned int max_n; /* Max nodes before enlarging. */
+unsigned int min_n; /* Min nodes before shrinking. */
+uint32_t mask;  /* Number of 'buckets', minus one. */
+uint32_t basis; /* Basis for rehashing client's hash values. */
+
+/* Padding to make ccmap_impl exactly one cache line long. */
+uint8_t pad[CACHE_LINE_SIZE 

[ovs-dev] [PATCH v3 1/5] classifier: Remove redundant index.

2016-04-22 Thread Jarno Rajahalme
The test for figuring out if the last index had the same fields as the
actual rules map as broken, resulting into keeping an unnecessary
index around.

Signed-off-by: Jarno Rajahalme 
Acked-by: Ryan Moats 
Acked-by: Ben Pfaff 
---
 lib/classifier.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index 19e26b4..52723ca 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -1499,11 +1499,11 @@ insert_subtable(struct classifier *cls, const struct 
minimask *mask)
 /* Map for the final stage. */
 *CONST_CAST(struct flowmap *, >index_maps[index])
 = miniflow_get_map_in_range(>masks, prev, FLOW_U64S);
-/* Check if the final stage adds any bits,
- * and remove the last index if it doesn't. */
+/* Check if the final stage adds any bits. */
 if (index > 0) {
-if (flowmap_equal(subtable->index_maps[index],
-  subtable->index_maps[index - 1])) {
+if (flowmap_is_empty(subtable->index_maps[index])) {
+/* Remove the last index, as it has the same fields as the rules
+ * map. */
 --index;
 cmap_destroy(>indices[index]);
 }
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v3 2/5] classifier: Remove logging.

2016-04-22 Thread Jarno Rajahalme
The only vlog line was a left over from debugging.

Signed-off-by: Jarno Rajahalme 
Acked-by: Ryan Moats 
Acked-by: Ben Pfaff 
---
 lib/classifier.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index 52723ca..a62a2bd 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -25,9 +25,6 @@
 #include "openvswitch/ofp-util.h"
 #include "packets.h"
 #include "util.h"
-#include "openvswitch/vlog.h"
-
-VLOG_DEFINE_THIS_MODULE(classifier);
 
 struct trie_ctx;
 
@@ -2261,7 +2258,6 @@ trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 
*prefix, int mlen)
 }
 /* Cannot go deeper. This should never happen, since only rules
  * that actually exist in the classifier are ever removed. */
-VLOG_WARN("Trying to remove non-existing rule from a prefix trie.");
 }
 
 
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v3 3/5] classifier: Remove rare optimization case.

2016-04-22 Thread Jarno Rajahalme
This optimization applied when a staged lookup index would narrow down
to a single rule, which happens sometimes is simple test cases, but
presumably less often in more populated flow tables.  The result of
this optimization allowed a bit more general megaflows, but the bit
patterns produced were sometimes cryptic.  Finally, a later fix to a
more important performance problem does not allow for this
optimization any more, so remove it now.

Signed-off-by: Jarno Rajahalme 
Acked-by: Ryan Moats 
Acked-by: Ben Pfaff 
---
 lib/classifier.c  | 75 +--
 tests/classifier.at   | 10 +++
 tests/ofproto-dpif.at | 14 +-
 3 files changed, 13 insertions(+), 86 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index a62a2bd..1557f6a 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -1658,54 +1658,6 @@ find_match(const struct cls_subtable *subtable, 
cls_version_t version,
 return NULL;
 }
 
-/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
- * for which 'flow', for which 'mask' has a bit set, specifies a particular
- * value has the correct value in 'target'.
- *
- * This function is equivalent to miniflow_and_mask_matches_flow() but this
- * version fills in the mask bits in 'wc'. */
-static inline bool
-miniflow_and_mask_matches_flow_wc(const struct miniflow *flow,
-  const struct minimask *mask,
-  const struct flow *target,
-  struct flow_wildcards *wc)
-{
-const uint64_t *flowp = miniflow_get_values(flow);
-const uint64_t *maskp = miniflow_get_values(>masks);
-const uint64_t *target_u64 = (const uint64_t *)target;
-uint64_t *wc_u64 = (uint64_t *)>masks;
-uint64_t diff;
-size_t idx;
-map_t map;
-
-FLOWMAP_FOR_EACH_MAP (map, mask->masks.map) {
-MAP_FOR_EACH_INDEX(idx, map) {
-uint64_t msk = *maskp++;
-
-diff = (*flowp++ ^ target_u64[idx]) & msk;
-if (diff) {
-goto out;
-}
-
-/* Fill in the bits that were looked at. */
-wc_u64[idx] |= msk;
-}
-target_u64 += MAP_T_BITS;
-wc_u64 += MAP_T_BITS;
-}
-return true;
-
-out:
-/* Only unwildcard if none of the differing bits is already
- * exact-matched. */
-if (!(wc_u64[idx] & diff)) {
-/* Keep one bit of the difference.  The selected bit may be
- * different in big-endian v.s. little-endian systems. */
-wc_u64[idx] |= rightmost_1bit(diff);
-}
-return false;
-}
-
 static const struct cls_match *
 find_match_wc(const struct cls_subtable *subtable, cls_version_t version,
   const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES],
@@ -1724,8 +1676,6 @@ find_match_wc(const struct cls_subtable *subtable, 
cls_version_t version,
 
 /* Try to finish early by checking fields in segments. */
 for (i = 0; i < subtable->n_indices; i++) {
-const struct cmap_node *inode;
-
 if (check_tries(trie_ctx, n_tries, subtable->trie_plen,
 subtable->index_maps[i], flow, wc)) {
 /* 'wc' bits for the trie field set, now unwildcard the preceding
@@ -1740,32 +1690,9 @@ find_match_wc(const struct cls_subtable *subtable, 
cls_version_t version,
subtable->index_maps[i],
_offset, );
 
-inode = cmap_find(>indices[i], hash);
-if (!inode) {
+if (!cmap_find(>indices[i], hash)) {
 goto no_match;
 }
-
-/* If we have narrowed down to a single rule already, check whether
- * that rule matches.  Either way, we're done.
- *
- * (Rare) hash collisions may cause us to miss the opportunity for this
- * optimization. */
-if (!cmap_node_next(inode)) {
-const struct cls_match *head;
-
-ASSIGN_CONTAINER(head, inode - i, index_nodes);
-if (miniflow_and_mask_matches_flow_wc(>flow, >mask,
-  flow, wc)) {
-/* Return highest priority rule that is visible. */
-CLS_MATCH_FOR_EACH (rule, head) {
-if (OVS_LIKELY(cls_match_visible_in_version(rule,
-version))) {
-return rule;
-}
-}
-}
-return NULL;
-}
 }
 /* Trie check for the final range. */
 if (check_tries(trie_ctx, n_tries, subtable->trie_plen,
diff --git a/tests/classifier.at b/tests/classifier.at
index b110508..e56ba3a 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -49,7 +49,7 @@ Datapath actions: 1
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 

Re: [ovs-dev] [PATCH 4/4] classifier: Avoid inserting duplicates to cmaps.

2016-04-22 Thread Jarno Rajahalme
Thanks for a thorough review Ben! I just sent a v2 to the list.

I addressed all your concerns and even found a small bug when testing with the 
new test-ccmap.

  Jarno

> On Apr 21, 2016, at 2:42 PM, Ben Pfaff  wrote:
> 
> On Wed, Apr 13, 2016 at 07:06:46PM -0700, Jarno Rajahalme wrote:
>> Staged lookup indices assumed that cmap is efficient fealing with
>> duplicates.  Duplicates are implemented as linked lists, however,
>> causing removal of rules to become (O^2) and highly cache-inefficient
>> with large number of duplicates.
>> 
>> This was problematic especially when many rules shared the same match
>> in packet metadata (e.g., a port number, but nothing else).
>> 
>> This patch fixes the problem by introducing a new 'count' variant of
>> the cmap (ccmap), which can be efficiently used to keep counts of
>> inserted hash values provided by the caller.  This does not require a
>> node in the user data structure, so this makes the classifier
>> implementation a bit more memory efficient, too.
>> 
>> Reported-by: Alok Kumar Maurya 
>> Signed-off-by: Jarno Rajahalme 
> 
> At first I was unhappy that we needed a new data structure, then I
> discovered that I like the new data structure.  Thanks for doing this.
> 
> This is a lot of new code to write without adding a test!  Can you adapt
> test-cmap.c, or write something else, to test ccmap?
> 
> My compilers do not like this.  Clang 3.5.0:
> 
>../lib/ccmap.c:193:9: error: address argument to atomic operation must be 
> a pointer to non-const _Atomic type ('const ccmap_node_t *' (aka 'const 
> _Atomic(uint64_t) *') invalid)
>../lib/ovs-atomic.h:393:5: note: expanded from macro 'atomic_read_relaxed'
>../lib/ovs-atomic-clang.h:53:15: note: expanded from macro 
> 'atomic_read_explicit'
>../lib/ccmap.c:245:9: error: address argument to atomic operation must be 
> a pointer to non-const _Atomic type ('const ccmap_node_t *' (aka 'const 
> _Atomic(uint64_t) *') invalid)
>../lib/ovs-atomic.h:393:5: note: expanded from macro 'atomic_read_relaxed'
>../lib/ovs-atomic-clang.h:53:15: note: expanded from macro 
> 'atomic_read_explicit'
>../lib/ccmap.c:544:13: error: address argument to atomic operation must be 
> a pointer to non-const _Atomic type ('const ccmap_node_t *' (aka 'const 
> _Atomic(uint64_t) *') invalid)
>../lib/ovs-atomic.h:393:5: note: expanded from macro 'atomic_read_relaxed'
>../lib/ovs-atomic-clang.h:53:15: note: expanded from macro 
> 'atomic_read_explicit'
> 
> Sparse:
> 
>../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
> modifiers)
>../lib/ccmap.c:193:9:expected void *
>../lib/ccmap.c:193:9:got unsigned long long const *
>../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
> modifiers)
>../lib/ccmap.c:193:9:expected void *
>../lib/ccmap.c:193:9:got unsigned long long const *
>../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
> modifiers)
>../lib/ccmap.c:193:9:expected void *
>../lib/ccmap.c:193:9:got unsigned long long const *
>../lib/ccmap.c:193:9: warning: incorrect type in argument 1 (different 
> modifiers)
>../lib/ccmap.c:193:9:expected void *
>../lib/ccmap.c:193:9:got unsigned long long const *
>../lib/ccmap.c:245:9: warning: incorrect type in argument 1 (different 
> modifiers)
>../lib/ccmap.c:245:9:expected void *
>../lib/ccmap.c:245:9:got unsigned long long const *
>../lib/ccmap.c:245:9: warning: incorrect type in argument 1 (different 
> modifiers)
>../lib/ccmap.c:245:9:expected void *
>../lib/ccmap.c:245:9:got unsigned long long const *
>../lib/ccmap.c:544:13: warning: incorrect type in argument 1 (different 
> modifiers)
>../lib/ccmap.c:544:13:expected void *
>../lib/ccmap.c:544:13:got unsigned long long const *
>../lib/ccmap.c:544:13: warning: incorrect type in argument 1 (different 
> modifiers)
>../lib/ccmap.c:544:13:expected void *
>../lib/ccmap.c:544:13:got unsigned long long const *
> 
> These comments and macros are the only mention of "entries", everything
> else talks about "nodes", perhaps the names here should be updated.
> Also, CCMAP_PADDING is never used and it is always 0 despite the
> comment:
> 
>/* An entry is an 32-bit hash and a 32-bit count. */
>#define CCMAP_ENTRY_SIZE (4 + 4)
> 
>/* Number of entries per bucket: 8 */
>#define CCMAP_K (CACHE_LINE_SIZE / CCMAP_ENTRY_SIZE)
> 
>/* Pad to make a bucket a full cache line in size: 4 on 32-bit, 0 on 
> 64-bit. */
>#define CCMAP_PADDING ((CACHE_LINE_SIZE - 4) - (CCMAP_K * 
> CCMAP_ENTRY_SIZE))
> 
> There's a lot of explicit "inline" in this file, presumably left over
> from cmap.c.  You might be able to drop some of it.
> 
> This comment on other_hash() talks about another comment that does not
> exist:
> 
> /* Given a rehashed value 

[ovs-dev] [PATCH v2 4/6] ovs-atomic-clang: Add missing lock free macros.

2016-04-22 Thread Jarno Rajahalme
ATOMIC_type_LOCK_FREE macros were missing.

Signed-off-by: Jarno Rajahalme 
---
 lib/ovs-atomic-clang.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/lib/ovs-atomic-clang.h b/lib/ovs-atomic-clang.h
index 34cc2fa..54c0922 100644
--- a/lib/ovs-atomic-clang.h
+++ b/lib/ovs-atomic-clang.h
@@ -21,6 +21,16 @@
 
 #define OVS_ATOMIC_CLANG_IMPL 1
 
+#define IS_LOCKLESS_ATOMIC(OBJECT) __c11_atomic_is_lock_free(sizeof(OBJECT))
+
+#define ATOMIC_BOOL_LOCK_FREE IS_LOCKLESS_ATOMIC(bool)
+#define ATOMIC_CHAR_LOCK_FREE IS_LOCKLESS_ATOMIC(char)
+#define ATOMIC_SHORT_LOCK_FREE IS_LOCKLESS_ATOMIC(short)
+#define ATOMIC_INT_LOCK_FREE IS_LOCKLESS_ATOMIC(int)
+#define ATOMIC_LONG_LOCK_FREE IS_LOCKLESS_ATOMIC(long)
+#define ATOMIC_LLONG_LOCK_FREE IS_LOCKLESS_ATOMIC(long long)
+#define ATOMIC_POINTER_LOCK_FREE IS_LOCKLESS_ATOMIC(void *)
+
 #define ATOMIC(TYPE) _Atomic(TYPE)
 
 #define ATOMIC_VAR_INIT(VALUE) (VALUE)
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 2/6] classifier: Remove logging.

2016-04-22 Thread Jarno Rajahalme
The only vlog line was a left over from debugging.

Signed-off-by: Jarno Rajahalme 
Acked-by: Ryan Moats 
Acked-by: Ben Pfaff 
---
 lib/classifier.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index 52723ca..a62a2bd 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -25,9 +25,6 @@
 #include "openvswitch/ofp-util.h"
 #include "packets.h"
 #include "util.h"
-#include "openvswitch/vlog.h"
-
-VLOG_DEFINE_THIS_MODULE(classifier);
 
 struct trie_ctx;
 
@@ -2261,7 +2258,6 @@ trie_remove_prefix(rcu_trie_ptr *root, const ovs_be32 
*prefix, int mlen)
 }
 /* Cannot go deeper. This should never happen, since only rules
  * that actually exist in the classifier are ever removed. */
-VLOG_WARN("Trying to remove non-existing rule from a prefix trie.");
 }
 
 
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2 6/6] classifier: Use ccmaps for staged lookup indices.

2016-04-22 Thread Jarno Rajahalme
Use the new ccmap type instead of cmap for staged lookup indices to
fix the problem with slow removal of rules with large number of
duplicates.  This was problematic especially when many rules shared
the same match in packet metadata (e.g., a port number, but nothing
else), causing a large number of duplicates to be inserted into the
staged lookup index.  ccmap only keeps the count of inserted (hash)
values, so duplicates do not add any performance penalty.

Reported-by: Alok Kumar Maurya 
Signed-off-by: Jarno Rajahalme 
---
 lib/classifier-private.h |  8 
 lib/classifier.c | 44 
 2 files changed, 16 insertions(+), 36 deletions(-)

diff --git a/lib/classifier-private.h b/lib/classifier-private.h
index babb64f..eb47a4c 100644
--- a/lib/classifier-private.h
+++ b/lib/classifier-private.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2015 Nicira, Inc.
+ * Copyright (c) 2014, 2015, 2016 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -17,6 +17,7 @@
 #ifndef CLASSIFIER_PRIVATE_H
 #define CLASSIFIER_PRIVATE_H 1
 
+#include "ccmap.h"
 #include "cmap.h"
 #include "flow.h"
 #include "hash.h"
@@ -44,7 +45,7 @@ struct cls_subtable {
 unsigned int trie_plen[CLS_MAX_TRIES];  /* Trie prefix length in 'mask'
  * (runtime configurable). */
 const int ports_mask_len;
-struct cmap indices[CLS_MAX_INDICES];   /* Staged lookup indices. */
+struct ccmap indices[CLS_MAX_INDICES];  /* Staged lookup indices. */
 rcu_trie_ptr ports_trie;/* NULL if none. */
 
 /* These fields are accessed by all readers. */
@@ -67,8 +68,7 @@ struct cls_match {
 
 /* Accessed by readers interested in wildcarding. */
 const int priority; /* Larger numbers are higher priorities. */
-struct cmap_node index_nodes[CLS_MAX_INDICES]; /* Within subtable's
-* 'indices'. */
+
 /* Accessed by all readers. */
 struct cmap_node cmap_node; /* Within struct cls_subtable 'rules'. */
 
diff --git a/lib/classifier.c b/lib/classifier.c
index 1557f6a..2b24724 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015 Nicira, Inc.
+ * Copyright (c) 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -499,21 +499,6 @@ static inline ovs_be32 minimatch_get_ports(const struct 
minimatch *match)
 & MINIFLOW_GET_BE32(>mask->masks, tp_src);
 }
 
-static void
-subtable_replace_head_rule(struct classifier *cls OVS_UNUSED,
-   struct cls_subtable *subtable,
-   struct cls_match *head, struct cls_match *new,
-   uint32_t hash, uint32_t ihash[CLS_MAX_INDICES])
-{
-/* Rule's data is already in the tries. */
-
-for (int i = 0; i < subtable->n_indices; i++) {
-cmap_replace(>indices[i], >index_nodes[i],
- >index_nodes[i], ihash[i]);
-}
-cmap_replace(>rules, >cmap_node, >cmap_node, hash);
-}
-
 /* Inserts 'rule' into 'cls' in 'version'.  Until 'rule' is removed from 'cls',
  * the caller must not modify or free it.
  *
@@ -587,15 +572,9 @@ classifier_replace(struct classifier *cls, const struct 
cls_rule *rule,
subtable->ports_mask_len);
 }
 
-/* Add new node to segment indices.
- *
- * Readers may find the rule in the indices before the rule is visible
- * in the subtables 'rules' map.  This may result in us losing the
- * opportunity to quit lookups earlier, resulting in sub-optimal
- * wildcarding.  This will be fixed later by revalidation (always
- * scheduled after flow table changes). */
+/* Add new node to segment indices. */
 for (i = 0; i < subtable->n_indices; i++) {
-cmap_insert(>indices[i], >index_nodes[i], ihash[i]);
+ccmap_inc(>indices[i], ihash[i]);
 }
 n_rules = cmap_insert(>rules, >cmap_node, hash);
 } else {   /* Equal rules exist in the classifier already. */
@@ -628,8 +607,8 @@ classifier_replace(struct classifier *cls, const struct 
cls_rule *rule,
 /* Replace the existing head in data structures, if rule is the new
  * head. */
 if (iter == head) {
-subtable_replace_head_rule(cls, subtable, head, new, hash,
-   ihash);
+cmap_replace(>rules, >cmap_node,
+ >cmap_node, hash);
 }
 
 if (old) {
@@ -780,7 +759,8 @@ classifier_remove(struct classifier 

[ovs-dev] [PATCH v2 5/6] lib: Add new 'counting cmap' type.

2016-04-22 Thread Jarno Rajahalme
cmap implements duplicates as linked lists, which causes removal of
rules to become (O^2) with large number of duplicates.  This patch
fixes the problem by introducing a new 'counting' variant of the cmap
(ccmap), which can be efficiently used to keep counts of inserted hash
values provided by the caller.  This does not require a node in the
user data structure, so this makes the user implementation a bit more
memory efficient, too.

Signed-off-by: Jarno Rajahalme 
---
 lib/automake.mk|   2 +
 lib/ccmap.c| 591 +
 lib/ccmap.h|  64 ++
 tests/automake.mk  |   2 +
 tests/library.at   |   6 +
 tests/test-ccmap.c | 292 ++
 6 files changed, 957 insertions(+)
 create mode 100644 lib/ccmap.c
 create mode 100644 lib/ccmap.h
 create mode 100644 tests/test-ccmap.c

diff --git a/lib/automake.mk b/lib/automake.mk
index 1ec2115..c54c8f8 100644
--- a/lib/automake.mk
+++ b/lib/automake.mk
@@ -38,6 +38,8 @@ lib_libopenvswitch_la_SOURCES = \
lib/classifier.c \
lib/classifier.h \
lib/classifier-private.h \
+   lib/ccmap.c \
+   lib/ccmap.h \
lib/cmap.c \
lib/cmap.h \
lib/colors.c \
diff --git a/lib/ccmap.c b/lib/ccmap.c
new file mode 100644
index 000..959e3c8
--- /dev/null
+++ b/lib/ccmap.c
@@ -0,0 +1,591 @@
+/*
+ * Copyright (c) 2014, 2016 Nicira, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include "ccmap.h"
+#include "coverage.h"
+#include "bitmap.h"
+#include "hash.h"
+#include "ovs-rcu.h"
+#include "random.h"
+#include "util.h"
+
+COVERAGE_DEFINE(ccmap_expand);
+COVERAGE_DEFINE(ccmap_shrink);
+
+/* A count-only version of the cmap. */
+
+/* Allow protected access to the value without atomic semantics.  This makes
+ * the exclusive writer somewhat faster. */
+typedef union {
+unsigned long long protected_value;
+ATOMIC(unsigned long long) atomic_value;
+} ccmap_node_t;
+BUILD_ASSERT_DECL(sizeof(ccmap_node_t) == sizeof(uint64_t));
+
+static uint64_t
+ccmap_node_get(const ccmap_node_t *node)
+{
+uint64_t value;
+
+atomic_read_relaxed(_CAST(ccmap_node_t *, node)->atomic_value,
+);
+
+return value;
+}
+
+/* The lock protecting the critical section has acquire/release semantics.  The
+ * writes in the critical section are guaranteed to happen before the same
+ * location is are read within the same critical section, or latest before the
+ * lock is released, and since there is no tight syncronization between the
+ * reader and the writer, the new values need not be visible to readers before
+ * the lock is released. */
+static uint64_t
+ccmap_node_get_protected(const ccmap_node_t *node)
+{
+return node->protected_value;
+}
+
+static void
+ccmap_node_set_protected(ccmap_node_t *node, uint64_t value)
+{
+/* We align nodes properly for them to be lock free if possible. */
+if (ATOMIC_LLONG_LOCK_FREE) {
+node->protected_value = value;
+} else {
+atomic_store_relaxed(>atomic_value, value);
+}
+}
+
+static uint64_t
+ccmap_node(uint32_t count, uint32_t hash)
+{
+return (uint64_t)count << 32 | hash;
+}
+
+static uint32_t
+ccmap_node_hash(uint64_t node)
+{
+return node;
+}
+
+static uint32_t
+ccmap_node_count(uint64_t node)
+{
+return node >> 32;
+}
+
+/* Number of nodes per bucket. */
+#define CCMAP_K (CACHE_LINE_SIZE / sizeof(ccmap_node_t))
+
+/* A cuckoo hash bucket.  Designed to be cache-aligned and exactly one cache
+ * line long. */
+struct ccmap_bucket {
+/* Each node incudes both the hash (low 32-bits) and the count (high
+ * 32-bits), allowing readers always getting a consistent pair. */
+ccmap_node_t nodes[CCMAP_K];
+};
+BUILD_ASSERT_DECL(sizeof(struct ccmap_bucket) == CACHE_LINE_SIZE);
+
+/* Default maximum load factor (as a fraction of UINT32_MAX + 1) before
+ * enlarging a ccmap.  Reasonable values lie between about 75% and 93%.  
Smaller
+ * values waste memory; larger values increase the average insertion time. */
+#define CCMAP_MAX_LOAD ((uint32_t) (UINT32_MAX * .85))
+
+/* Default minimum load factor (as a fraction of UINT32_MAX + 1) before
+ * shrinking a ccmap.  Currently, the value is chosen to be 20%, this
+ * means ccmap will have a 40% load factor after shrink. */
+#define CCMAP_MIN_LOAD ((uint32_t) (UINT32_MAX * .20))
+
+/* The implementation of a concurrent hash map. */
+struct ccmap_impl 

[ovs-dev] [PATCH v2 3/6] classifier: Remove rare optimization case.

2016-04-22 Thread Jarno Rajahalme
This optimization applied when a staged lookup index would narrow down
to a single rule, which happens sometimes is simple test cases, but
presumably less often in more populated flow tables.  The result of
this optimization allowed a bit more general megaflows, but the bit
patterns produced were sometimes cryptic.  Finally, a later fix to a
more important performance problem does not allow for this
optimization any more, so remove it now.

Signed-off-by: Jarno Rajahalme 
Acked-by: Ryan Moats 
Acked-by: Ben Pfaff 
---
 lib/classifier.c  | 75 +--
 tests/classifier.at   | 10 +++
 tests/ofproto-dpif.at | 14 +-
 3 files changed, 13 insertions(+), 86 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index a62a2bd..1557f6a 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -1658,54 +1658,6 @@ find_match(const struct cls_subtable *subtable, 
cls_version_t version,
 return NULL;
 }
 
-/* Returns true if 'target' satisifies 'flow'/'mask', that is, if each bit
- * for which 'flow', for which 'mask' has a bit set, specifies a particular
- * value has the correct value in 'target'.
- *
- * This function is equivalent to miniflow_and_mask_matches_flow() but this
- * version fills in the mask bits in 'wc'. */
-static inline bool
-miniflow_and_mask_matches_flow_wc(const struct miniflow *flow,
-  const struct minimask *mask,
-  const struct flow *target,
-  struct flow_wildcards *wc)
-{
-const uint64_t *flowp = miniflow_get_values(flow);
-const uint64_t *maskp = miniflow_get_values(>masks);
-const uint64_t *target_u64 = (const uint64_t *)target;
-uint64_t *wc_u64 = (uint64_t *)>masks;
-uint64_t diff;
-size_t idx;
-map_t map;
-
-FLOWMAP_FOR_EACH_MAP (map, mask->masks.map) {
-MAP_FOR_EACH_INDEX(idx, map) {
-uint64_t msk = *maskp++;
-
-diff = (*flowp++ ^ target_u64[idx]) & msk;
-if (diff) {
-goto out;
-}
-
-/* Fill in the bits that were looked at. */
-wc_u64[idx] |= msk;
-}
-target_u64 += MAP_T_BITS;
-wc_u64 += MAP_T_BITS;
-}
-return true;
-
-out:
-/* Only unwildcard if none of the differing bits is already
- * exact-matched. */
-if (!(wc_u64[idx] & diff)) {
-/* Keep one bit of the difference.  The selected bit may be
- * different in big-endian v.s. little-endian systems. */
-wc_u64[idx] |= rightmost_1bit(diff);
-}
-return false;
-}
-
 static const struct cls_match *
 find_match_wc(const struct cls_subtable *subtable, cls_version_t version,
   const struct flow *flow, struct trie_ctx trie_ctx[CLS_MAX_TRIES],
@@ -1724,8 +1676,6 @@ find_match_wc(const struct cls_subtable *subtable, 
cls_version_t version,
 
 /* Try to finish early by checking fields in segments. */
 for (i = 0; i < subtable->n_indices; i++) {
-const struct cmap_node *inode;
-
 if (check_tries(trie_ctx, n_tries, subtable->trie_plen,
 subtable->index_maps[i], flow, wc)) {
 /* 'wc' bits for the trie field set, now unwildcard the preceding
@@ -1740,32 +1690,9 @@ find_match_wc(const struct cls_subtable *subtable, 
cls_version_t version,
subtable->index_maps[i],
_offset, );
 
-inode = cmap_find(>indices[i], hash);
-if (!inode) {
+if (!cmap_find(>indices[i], hash)) {
 goto no_match;
 }
-
-/* If we have narrowed down to a single rule already, check whether
- * that rule matches.  Either way, we're done.
- *
- * (Rare) hash collisions may cause us to miss the opportunity for this
- * optimization. */
-if (!cmap_node_next(inode)) {
-const struct cls_match *head;
-
-ASSIGN_CONTAINER(head, inode - i, index_nodes);
-if (miniflow_and_mask_matches_flow_wc(>flow, >mask,
-  flow, wc)) {
-/* Return highest priority rule that is visible. */
-CLS_MATCH_FOR_EACH (rule, head) {
-if (OVS_LIKELY(cls_match_visible_in_version(rule,
-version))) {
-return rule;
-}
-}
-}
-return NULL;
-}
 }
 /* Trie check for the final range. */
 if (check_tries(trie_ctx, n_tries, subtable->trie_plen,
diff --git a/tests/classifier.at b/tests/classifier.at
index b110508..e56ba3a 100644
--- a/tests/classifier.at
+++ b/tests/classifier.at
@@ -49,7 +49,7 @@ Datapath actions: 1
 ])
 AT_CHECK([ovs-appctl ofproto/trace br0 

[ovs-dev] [PATCH v2 1/6] classifier: Remove redundant index.

2016-04-22 Thread Jarno Rajahalme
The test for figuring out if the last index had the same fields as the
actual rules map as broken, resulting into keeping an unnecessary
index around.

Signed-off-by: Jarno Rajahalme 
Acked-by: Ryan Moats 
Acked-by: Ben Pfaff 
---
 lib/classifier.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lib/classifier.c b/lib/classifier.c
index 19e26b4..52723ca 100644
--- a/lib/classifier.c
+++ b/lib/classifier.c
@@ -1499,11 +1499,11 @@ insert_subtable(struct classifier *cls, const struct 
minimask *mask)
 /* Map for the final stage. */
 *CONST_CAST(struct flowmap *, >index_maps[index])
 = miniflow_get_map_in_range(>masks, prev, FLOW_U64S);
-/* Check if the final stage adds any bits,
- * and remove the last index if it doesn't. */
+/* Check if the final stage adds any bits. */
 if (index > 0) {
-if (flowmap_equal(subtable->index_maps[index],
-  subtable->index_maps[index - 1])) {
+if (flowmap_is_empty(subtable->index_maps[index])) {
+/* Remove the last index, as it has the same fields as the rules
+ * map. */
 --index;
 cmap_destroy(>indices[index]);
 }
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] util.h: Restore stdarg.h which is necessary for va_list

2016-04-22 Thread YAMAMOTO Takashi
Fixes a regression in commit b44ff8d826535025f4f8d12808c4ef36a7a8 .
("Misc cleanup with "util.h" header files")

Signed-off-by: YAMAMOTO Takashi 
---
 lib/util.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/util.h b/lib/util.h
index a908267..f631bdf 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
-- 
2.5.4 (Apple Git-61)

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v9 15/15] netdev-dpdk: Use ->reconfigure() call to change rx/tx queues.

2016-04-22 Thread Daniele Di Proietto
This introduces in dpif-netdev and netdev-dpdk the first use for the
newly introduce reconfigure netdev call.

When a request to change the number of queues comes, netdev-dpdk will
remember this and notify the upper layer via
netdev_request_reconfigure().

The datapath, instead of periodically calling netdev_set_multiq(), can
detect this and call reconfigure().

This mechanism can also be used to:
* Automatically match the number of rxq with the one provided by qemu
  via the new_device callback.
* Provide a way to change the MTU of dpdk devices at runtime.
* Move a DPDK vhost device to the proper NUMA socket.

Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c |  69 +-
 lib/netdev-bsd.c  |   2 +-
 lib/netdev-dpdk.c | 195 ++
 lib/netdev-dummy.c|   2 +-
 lib/netdev-linux.c|   2 +-
 lib/netdev-provider.h |  23 +++---
 lib/netdev-vport.c|   2 +-
 lib/netdev.c  |  36 +++---
 lib/netdev.h  |   3 +-
 9 files changed, 160 insertions(+), 174 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7531c2f..a1fa2d7 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -257,8 +257,6 @@ struct dp_netdev_port {
 unsigned n_rxq; /* Number of elements in 'rxq' */
 struct netdev_rxq **rxq;
 char *type; /* Port type as requested by user. */
-int latest_requested_n_rxq; /* Latest requested from netdev number
-   of rx queues. */
 };
 
 /* Contained by struct dp_netdev_flow's 'stats' member.  */
@@ -1166,20 +1164,26 @@ port_create(const char *devname, const char *open_type, 
const char *type,
 /* There can only be ovs_numa_get_n_cores() pmd threads,
  * so creates a txq for each, and one extra for the non
  * pmd threads. */
-error = netdev_set_multiq(netdev, n_cores + 1,
-  netdev_requested_n_rxq(netdev));
+error = netdev_set_tx_multiq(netdev, n_cores + 1);
 if (error && (error != EOPNOTSUPP)) {
 VLOG_ERR("%s, cannot set multiq", devname);
 goto out;
 }
 }
+
+if (netdev_is_reconf_required(netdev)) {
+error = netdev_reconfigure(netdev);
+if (error) {
+goto out;
+}
+}
+
 port = xzalloc(sizeof *port);
 port->port_no = port_no;
 port->netdev = netdev;
 port->n_rxq = netdev_n_rxq(netdev);
 port->rxq = xcalloc(port->n_rxq, sizeof *port->rxq);
 port->type = xstrdup(type);
-port->latest_requested_n_rxq = netdev_requested_n_rxq(netdev);
 
 for (i = 0; i < port->n_rxq; i++) {
 error = netdev_rxq_open(netdev, >rxq[i], i);
@@ -2467,27 +2471,6 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op 
**ops, size_t n_ops)
 }
 }
 
-/* Returns true if the configuration for rx queues is changed. */
-static bool
-pmd_n_rxq_changed(const struct dp_netdev *dp)
-{
-struct dp_netdev_port *port;
-
-ovs_mutex_lock(>port_mutex);
-HMAP_FOR_EACH (port, node, >ports) {
-int requested_n_rxq = netdev_requested_n_rxq(port->netdev);
-
-if (netdev_is_pmd(port->netdev)
-&& port->latest_requested_n_rxq != requested_n_rxq) {
-ovs_mutex_unlock(>port_mutex);
-return true;
-}
-}
-ovs_mutex_unlock(>port_mutex);
-
-return false;
-}
-
 static bool
 cmask_equals(const char *a, const char *b)
 {
@@ -2611,11 +2594,9 @@ static int
 port_reconfigure(struct dp_netdev_port *port)
 {
 struct netdev *netdev = port->netdev;
-int requested_n_rxq = netdev_requested_n_rxq(netdev);
 int i, err;
 
-if (!netdev_is_pmd(port->netdev)
-|| port->latest_requested_n_rxq != requested_n_rxq) {
+if (!netdev_is_reconf_required(netdev)) {
 return 0;
 }
 
@@ -2626,15 +2607,14 @@ port_reconfigure(struct dp_netdev_port *port)
 }
 port->n_rxq = 0;
 
-/* Sets the new rx queue config. */
-err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores() + 1,
-requested_n_rxq);
+/* Allows 'netdev' to apply the pending configuration changes. */
+err = netdev_reconfigure(netdev);
 if (err && (err != EOPNOTSUPP)) {
-VLOG_ERR("Failed to set dpdk interface %s rx_queue to: %u",
- netdev_get_name(port->netdev), requested_n_rxq);
+VLOG_ERR("Failed to set interface %s new configuration",
+ netdev_get_name(netdev));
 return err;
 }
-/* If the set_multiq() above succeeds, reopens the 'rxq's. */
+/* If the netdev_reconfigure( above succeeds, reopens the 'rxq's. */
 port->rxq = xrealloc(port->rxq, sizeof *port->rxq * netdev_n_rxq(netdev));
 for (i = 0; i < netdev_n_rxq(netdev); i++) {
 err = netdev_rxq_open(netdev, >rxq[i], i);
@@ -2678,6 +2658,22 @@ reconfigure_pmd_threads(struct dp_netdev *dp)
 

Re: [ovs-dev] [PATCH v8 08/16] dpif-netdev: Add pmd thread local port cache for transmission.

2016-04-22 Thread Daniele Di Proietto
Hi Ilya,

thanks for reporting this.  I've decided to handle tx port cache
in dp_netdev_set_pmds_on_numa() and dp_netdev_set_nonpmd().

I've sent an updated version here:

http://openvswitch.org/pipermail/dev/2016-April/070064.html

Thanks,

Daniele

On 22/04/2016 08:15, "Ilya Maximets"  wrote:

>Without this we will lost connection to non-pmd ports:
>--
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>index 26ef612..be1f291 100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -3359,9 +3359,7 @@ dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
> struct hmapx_node *node;
> 
> HMAP_FOR_EACH (port, node, >ports) {
>-if (netdev_is_pmd(port->netdev)) {
>-dp_netdev_add_port_to_pmds__(dp, port, _reload);
>-}
>+dp_netdev_add_port_to_pmds__(dp, port, _reload);
> }
> 
> HMAPX_FOR_EACH (node, _reload) {
>--
>
>Best regards, Ilya Maximets.
>
>On 20.04.2016 01:28, diproiettod at vmware.com (Daniele Di Proietto)
>wrote:
>> A future commit will stop using RCU for 'dp->ports' and use a mutex for
>> reading/writing them.  To avoid taking a mutex in dp_execute_cb(), which
>> is called in the fast path, this commit introduces a pmd thread local
>> cache of ports.
>> 
>> The downside is that every port add/remove now needs to synchronize with
>> every pmd thread.
>> 
>> Among the advantages, keeping a per thread port mapping could allow
>> greater control over the txq assigment.
>> 
>> Signed-off-by: Daniele Di Proietto 
>> ---
>>  lib/dpif-netdev.c | 249
>>+++---
>>  1 file changed, 179 insertions(+), 70 deletions(-)
>> 
>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
>> index cedaf39..bd2249e 100644
>> --- a/lib/dpif-netdev.c
>> +++ b/lib/dpif-netdev.c
>> @@ -184,6 +184,7 @@ static bool dpcls_lookup(const struct dpcls *cls,
>>   *
>>   *dp_netdev_mutex (global)
>>   *port_mutex
>> + *non_pmd_mutex
>>   */
>>  struct dp_netdev {
>>  const struct dpif_class *const class;
>> @@ -379,6 +380,13 @@ struct rxq_poll {
>>  struct ovs_list node;
>>  };
>>  
>> +/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or
>>'tx_ports'. */
>> +struct tx_port {
>> +odp_port_t port_no;
>> +struct netdev *netdev;
>> +struct hmap_node node;
>> +};
>> +
>>  /* PMD: Poll modes drivers.  PMD accesses devices via polling to
>>eliminate
>>   * the performance overhead of interrupt processing.  Therefore netdev
>>can
>>   * not implement rx-wait for these devices.  dpif-netdev needs to poll
>> @@ -405,8 +413,8 @@ struct dp_netdev_pmd_thread {
>>  
>>  /* Per thread exact-match cache.  Note, the instance for cpu core
>>   * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
>> - * need to be protected (e.g. by 'dp_netdev_mutex').  All other
>> - * instances will only be accessed by its own pmd thread. */
>> + * need to be protected by 'non_pmd_mutex'.  Every other instance
>> + * will only be accessed by its own pmd thread. */
>>  struct emc_cache flow_cache;
>>  
>>  /* Classifier and Flow-Table.
>> @@ -435,10 +443,20 @@ struct dp_netdev_pmd_thread {
>>  atomic_int tx_qid;  /* Queue id used by this pmd
>>thread to
>>   * send packets on all netdevs */
>>  
>> -struct ovs_mutex poll_mutex;/* Mutex for poll_list. */
>> +struct ovs_mutex port_mutex;/* Mutex for 'poll_list' and
>>'tx_ports'. */
>>  /* List of rx queues to poll. */
>>  struct ovs_list poll_list OVS_GUARDED;
>> -int poll_cnt;   /* Number of elemints in
>>poll_list. */
>> +/* Number of elements in 'poll_list' */
>> +int poll_cnt;
>> +/* Map of 'tx_port's used for transmission.  Written by the main
>>thread,
>> + * read by the pmd thread. */
>> +struct hmap tx_ports OVS_GUARDED;
>> +
>> +/* Map of 'tx_port' used in the fast path. This is a thread-local
>>copy of
>> + * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be
>>accessed
>> + * by multiple threads, and thusly need to be protected by
>>'non_pmd_mutex'.
>> + * Every other instance will only be accessed by its own pmd
>>thread. */
>> +struct hmap port_cache;
>>  
>>  /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>>   * The main thread keeps 'stats_zero' and 'cycles_zero' as base
>> @@ -494,7 +512,7 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct
>>cmap_position *pos);
>>  static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
>>  static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int
>>numa_id);
>>  static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int
>>numa_id);
>> -static void dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread
>>*pmd);
>> +static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread
>>*pmd);
>>  static void 

[ovs-dev] [PATCH v9 07/15] dpif-netdev: Add pmd thread local port cache for transmission.

2016-04-22 Thread Daniele Di Proietto
A future commit will stop using RCU for 'dp->ports' and use a mutex for
reading/writing them.  To avoid taking a mutex in dp_execute_cb(), which
is called in the fast path, this commit introduces a pmd thread local
cache of ports.

The downside is that every port add/remove now needs to synchronize with
every pmd thread.

Among the advantages, keeping a per thread port mapping could allow
greater control over the txq assigment.

Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c | 348 ++
 1 file changed, 244 insertions(+), 104 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index fbd23cf..a96bebf 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -184,6 +184,7 @@ static bool dpcls_lookup(const struct dpcls *cls,
  *
  *dp_netdev_mutex (global)
  *port_mutex
+ *non_pmd_mutex
  */
 struct dp_netdev {
 const struct dpif_class *const class;
@@ -379,6 +380,13 @@ struct rxq_poll {
 struct ovs_list node;
 };
 
+/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
+struct tx_port {
+odp_port_t port_no;
+struct netdev *netdev;
+struct hmap_node node;
+};
+
 /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
  * the performance overhead of interrupt processing.  Therefore netdev can
  * not implement rx-wait for these devices.  dpif-netdev needs to poll
@@ -405,8 +413,8 @@ struct dp_netdev_pmd_thread {
 
 /* Per thread exact-match cache.  Note, the instance for cpu core
  * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
- * need to be protected (e.g. by 'dp_netdev_mutex').  All other
- * instances will only be accessed by its own pmd thread. */
+ * need to be protected by 'non_pmd_mutex'.  Every other instance
+ * will only be accessed by its own pmd thread. */
 struct emc_cache flow_cache;
 
 /* Classifier and Flow-Table.
@@ -435,10 +443,20 @@ struct dp_netdev_pmd_thread {
 atomic_int tx_qid;  /* Queue id used by this pmd thread to
  * send packets on all netdevs */
 
-struct ovs_mutex poll_mutex;/* Mutex for poll_list. */
+struct ovs_mutex port_mutex;/* Mutex for 'poll_list' and 'tx_ports'. */
 /* List of rx queues to poll. */
 struct ovs_list poll_list OVS_GUARDED;
-int poll_cnt;   /* Number of elemints in poll_list. */
+/* Number of elements in 'poll_list' */
+int poll_cnt;
+/* Map of 'tx_port's used for transmission.  Written by the main thread,
+ * read by the pmd thread. */
+struct hmap tx_ports OVS_GUARDED;
+
+/* Map of 'tx_port' used in the fast path. This is a thread-local copy of
+ * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
+ * by multiple threads, and thusly need to be protected by 'non_pmd_mutex'.
+ * Every other instance will only be accessed by its own pmd thread. */
+struct hmap port_cache;
 
 /* Only a pmd thread can write on its own 'cycles' and 'stats'.
  * The main thread keeps 'stats_zero' and 'cycles_zero' as base
@@ -494,20 +512,24 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct 
cmap_position *pos);
 static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
 static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
 static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
-static void dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread *pmd);
+static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp,
  struct dp_netdev_port *port);
-static void
-dp_netdev_add_port_to_pmds(struct dp_netdev *dp, struct dp_netdev_port *port);
-static void
-dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
- struct dp_netdev_port *port, struct netdev_rxq *rx);
+static void dp_netdev_add_port_to_pmds(struct dp_netdev *dp,
+   struct dp_netdev_port *port);
+static void dp_netdev_add_port_tx_to_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct dp_netdev_port *port);
+static void dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread *pmd,
+ struct dp_netdev_port *port,
+ struct netdev_rxq *rx);
 static struct dp_netdev_pmd_thread *
 dp_netdev_less_loaded_pmd_on_numa(struct dp_netdev *dp, int numa_id);
 static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp);
 static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd);
+static void pmd_load_cached_ports(struct dp_netdev_pmd_thread *pmd)
+

[ovs-dev] [PATCH v9 12/15] dpif-netdev: Change pmd thread configuration in dpif_netdev_run().

2016-04-22 Thread Daniele Di Proietto
Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c   | 144 ++--
 lib/dpif-provider.h |   3 +-
 2 files changed, 84 insertions(+), 63 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5c4d6f2..e5a0433 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -224,7 +224,9 @@ struct dp_netdev {
 ovsthread_key_t per_pmd_key;
 
 /* Cpu mask for pin of pmd threads. */
+char *requested_pmd_cmask;
 char *pmd_cmask;
+
 uint64_t last_tnl_conf_seq;
 };
 
@@ -2465,18 +2467,17 @@ dpif_netdev_operate(struct dpif *dpif, struct dpif_op 
**ops, size_t n_ops)
 }
 }
 
-/* Returns true if the configuration for rx queues or cpu mask
- * is changed. */
+/* Returns true if the configuration for rx queues is changed. */
 static bool
-pmd_config_changed(const struct dp_netdev *dp, const char *cmask)
+pmd_n_rxq_changed(const struct dp_netdev *dp)
 {
 struct dp_netdev_port *port;
 
 ovs_mutex_lock(>port_mutex);
 HMAP_FOR_EACH (port, node, >ports) {
-struct netdev *netdev = port->netdev;
-int requested_n_rxq = netdev_requested_n_rxq(netdev);
-if (netdev_is_pmd(netdev)
+int requested_n_rxq = netdev_requested_n_rxq(port->netdev);
+
+if (netdev_is_pmd(port->netdev)
 && port->latest_requested_n_rxq != requested_n_rxq) {
 ovs_mutex_unlock(>port_mutex);
 return true;
@@ -2484,69 +2485,29 @@ pmd_config_changed(const struct dp_netdev *dp, const 
char *cmask)
 }
 ovs_mutex_unlock(>port_mutex);
 
-if (dp->pmd_cmask != NULL && cmask != NULL) {
-return strcmp(dp->pmd_cmask, cmask);
-} else {
-return (dp->pmd_cmask != NULL || cmask != NULL);
+return false;
+}
+
+static bool
+cmask_equals(const char *a, const char *b)
+{
+if (a && b) {
+return !strcmp(a, b);
 }
+
+return a == NULL && b == NULL;
 }
 
-/* Resets pmd threads if the configuration for 'rxq's or cpu mask changes. */
+/* Changes the number or the affinity of pmd threads.  The changes are actually
+ * applied in dpif_netdev_run(). */
 static int
 dpif_netdev_pmd_set(struct dpif *dpif, const char *cmask)
 {
 struct dp_netdev *dp = get_dp_netdev(dpif);
 
-if (pmd_config_changed(dp, cmask)) {
-struct dp_netdev_port *port;
-
-dp_netdev_destroy_all_pmds(dp);
-
-ovs_mutex_lock(>port_mutex);
-HMAP_FOR_EACH (port, node, >ports) {
-struct netdev *netdev = port->netdev;
-int requested_n_rxq = netdev_requested_n_rxq(netdev);
-if (netdev_is_pmd(port->netdev)
-&& port->latest_requested_n_rxq != requested_n_rxq) {
-int i, err;
-
-/* Closes the existing 'rxq's. */
-for (i = 0; i < netdev_n_rxq(port->netdev); i++) {
-netdev_rxq_close(port->rxq[i]);
-port->rxq[i] = NULL;
-}
-port->n_rxq = 0;
-
-/* Sets the new rx queue config.  */
-err = netdev_set_multiq(port->netdev,
-ovs_numa_get_n_cores() + 1,
-requested_n_rxq);
-if (err && (err != EOPNOTSUPP)) {
-VLOG_ERR("Failed to set dpdk interface %s rx_queue to:"
- " %u", netdev_get_name(port->netdev),
- requested_n_rxq);
-ovs_mutex_unlock(>port_mutex);
-return err;
-}
-port->latest_requested_n_rxq = requested_n_rxq;
-/* If the set_multiq() above succeeds, reopens the 'rxq's. */
-port->n_rxq = netdev_n_rxq(port->netdev);
-port->rxq = xrealloc(port->rxq, sizeof *port->rxq * 
port->n_rxq);
-for (i = 0; i < port->n_rxq; i++) {
-netdev_rxq_open(port->netdev, >rxq[i], i);
-}
-}
-}
-/* Reconfigures the cpu mask. */
-ovs_numa_set_cpu_mask(cmask);
-free(dp->pmd_cmask);
-dp->pmd_cmask = cmask ? xstrdup(cmask) : NULL;
-
-/* Restores the non-pmd. */
-dp_netdev_set_nonpmd(dp);
-/* Restores all pmd threads. */
-dp_netdev_reset_pmd_threads(dp);
-ovs_mutex_unlock(>port_mutex);
+if (!cmask_equals(dp->requested_pmd_cmask, cmask)) {
+free(dp->requested_pmd_cmask);
+dp->requested_pmd_cmask = cmask ? xstrdup(cmask) : NULL;
 }
 
 return 0;
@@ -2646,6 +2607,59 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread 
*pmd,
 }
 }
 
+static void
+reconfigure_pmd_threads(struct dp_netdev *dp)
+OVS_REQUIRES(dp->port_mutex)
+{
+struct dp_netdev_port *port;
+
+dp_netdev_destroy_all_pmds(dp);
+
+HMAP_FOR_EACH (port, node, >ports) {
+struct netdev *netdev = port->netdev;
+int requested_n_rxq = 

[ovs-dev] [PATCH v9 09/15] dpif-netdev: Use hmap for ports.

2016-04-22 Thread Daniele Di Proietto
netdev objects are hard to use with RCU, because it's not possible to
split removal and reclamation.  Postponing the removal means that the
port is not removed and cannot be readded immediately.  Waiting for
reclamation means introducing a quiescent state, and that may introduce
subtle bugs, due to the RCU model we use in userspace.

This commit changes the port container from cmap to hmap.  'port_mutex'
must be held by readers and writers.  This shouldn't have performace
impact, as readers in the fast path use a thread local cache.

Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c | 111 --
 1 file changed, 67 insertions(+), 44 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a96bebf..5c4d6f2 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -195,9 +195,10 @@ struct dp_netdev {
 
 /* Ports.
  *
- * Protected by RCU.  Take the mutex to add or remove ports. */
+ * Any lookup into 'ports' or any access to the dp_netdev_ports found
+ * through 'ports' requires taking 'port_mutex'. */
 struct ovs_mutex port_mutex;
-struct cmap ports;
+struct hmap ports;
 struct seq *port_seq;   /* Incremented whenever a port changes. */
 
 /* Protects access to ofproto-dpif-upcall interface during revalidator
@@ -228,7 +229,8 @@ struct dp_netdev {
 };
 
 static struct dp_netdev_port *dp_netdev_lookup_port(const struct dp_netdev *dp,
-odp_port_t);
+odp_port_t)
+OVS_REQUIRES(dp->port_mutex);
 
 enum dp_stat_type {
 DP_STAT_EXACT_HIT,  /* Packets that had an exact match (emc). */
@@ -248,7 +250,7 @@ enum pmd_cycles_counter_type {
 struct dp_netdev_port {
 odp_port_t port_no;
 struct netdev *netdev;
-struct cmap_node node;  /* Node in dp_netdev's 'ports'. */
+struct hmap_node node;  /* Node in dp_netdev's 'ports'. */
 struct netdev_saved_flags *sf;
 unsigned n_rxq; /* Number of elements in 'rxq' */
 struct netdev_rxq **rxq;
@@ -476,9 +478,11 @@ struct dpif_netdev {
 };
 
 static int get_port_by_number(struct dp_netdev *dp, odp_port_t port_no,
-  struct dp_netdev_port **portp);
+  struct dp_netdev_port **portp)
+OVS_REQUIRES(dp->port_mutex);
 static int get_port_by_name(struct dp_netdev *dp, const char *devname,
-struct dp_netdev_port **portp);
+struct dp_netdev_port **portp)
+OVS_REQUIRES(dp->port_mutex);
 static void dp_netdev_free(struct dp_netdev *)
 OVS_REQUIRES(dp_netdev_mutex);
 static int do_add_port(struct dp_netdev *dp, const char *devname,
@@ -504,14 +508,17 @@ static void dp_netdev_configure_pmd(struct 
dp_netdev_pmd_thread *pmd,
 struct dp_netdev *dp, unsigned core_id,
 int numa_id);
 static void dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd);
-static void dp_netdev_set_nonpmd(struct dp_netdev *dp);
+static void dp_netdev_set_nonpmd(struct dp_netdev *dp)
+OVS_REQUIRES(dp->port_mutex);
+
 static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp,
   unsigned core_id);
 static struct dp_netdev_pmd_thread *
 dp_netdev_pmd_get_next(struct dp_netdev *dp, struct cmap_position *pos);
 static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
 static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
-static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
+static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id)
+OVS_REQUIRES(dp->port_mutex);
 static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp,
  struct dp_netdev_port *port);
@@ -524,7 +531,8 @@ static void dp_netdev_add_rxq_to_pmd(struct 
dp_netdev_pmd_thread *pmd,
  struct netdev_rxq *rx);
 static struct dp_netdev_pmd_thread *
 dp_netdev_less_loaded_pmd_on_numa(struct dp_netdev *dp, int numa_id);
-static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp);
+static void dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
+OVS_REQUIRES(dp->port_mutex);
 static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread *pmd);
@@ -915,7 +923,7 @@ create_dp_netdev(const char *name, const struct dpif_class 
*class,
 atomic_flag_clear(>destroyed);
 
 ovs_mutex_init(>port_mutex);
-cmap_init(>ports);
+hmap_init(>ports);
 dp->port_seq = seq_create();
 fat_rwlock_init(>upcall_rwlock);
 
@@ -928,9 

[ovs-dev] [PATCH v9 11/15] ofproto-dpif: Call dpif_poll_threads_set() before dpif_run().

2016-04-22 Thread Daniele Di Proietto
An upcoming commit will make dpif_poll_threads_set() record the
requested configuration and dpif_run() apply it, so it makes sense to
change the order.

Signed-off-by: Daniele Di Proietto 
Tested-by: Ilya Maximets 
Acked-by: Ilya Maximets 
Acked-by: Mark Kavanagh 
---
 ofproto/ofproto-dpif.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
index 747482c..2aa12b7 100644
--- a/ofproto/ofproto-dpif.c
+++ b/ofproto/ofproto-dpif.c
@@ -534,6 +534,8 @@ type_run(const char *type)
 return 0;
 }
 
+/* This must be called before dpif_run() */
+dpif_poll_threads_set(backer->dpif, pmd_cpu_mask);
 
 if (dpif_run(backer->dpif)) {
 backer->need_revalidate = REV_RECONFIGURE;
@@ -562,8 +564,6 @@ type_run(const char *type)
 udpif_set_threads(backer->udpif, n_handlers, n_revalidators);
 }
 
-dpif_poll_threads_set(backer->dpif, pmd_cpu_mask);
-
 if (backer->need_revalidate) {
 struct ofproto_dpif *ofproto;
 struct simap_node *node;
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v9 10/15] ovs-thread: Do not quiesce in ovs_mutex_cond_wait().

2016-04-22 Thread Daniele Di Proietto
ovs_mutex_cond_wait() is used in many functions in dpif-netdev to
synchronize with pmd threads, but we can't guarantee that the callers do
not hold RCU references, so it's better to avoid quiescing.

In system_stats_thread_func() the code relied on ovs_mutex_cond_wait()
to introduce a quiescent state, so explicit calls to
ovsrcu_quiesce_start() and ovsrcu_quiesce_end() are added there.

Signed-off-by: Daniele Di Proietto 
---
 lib/ovs-rcu.h   | 3 +--
 lib/ovs-thread.c| 2 --
 vswitchd/system-stats.c | 6 ++
 3 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h
index 600be0b..8750ead 100644
--- a/lib/ovs-rcu.h
+++ b/lib/ovs-rcu.h
@@ -42,8 +42,7 @@
  * A "quiescent state" is a time at which a thread holds no pointers to memory
  * that is managed by RCU; that is, when the thread is known not to reference
  * memory that might be an old version of some object freed via RCU.  For
- * example, poll_block() includes a quiescent state, as does
- * ovs_mutex_cond_wait().
+ * example, poll_block() includes a quiescent state.
  *
  * The following functions manage the recognition of quiescent states:
  *
diff --git a/lib/ovs-thread.c b/lib/ovs-thread.c
index 3c065cf..26dd928 100644
--- a/lib/ovs-thread.c
+++ b/lib/ovs-thread.c
@@ -253,9 +253,7 @@ ovs_mutex_cond_wait(pthread_cond_t *cond, const struct 
ovs_mutex *mutex_)
 struct ovs_mutex *mutex = CONST_CAST(struct ovs_mutex *, mutex_);
 int error;
 
-ovsrcu_quiesce_start();
 error = pthread_cond_wait(cond, >lock);
-ovsrcu_quiesce_end();
 
 if (OVS_UNLIKELY(error)) {
 ovs_abort(error, "pthread_cond_wait failed");
diff --git a/vswitchd/system-stats.c b/vswitchd/system-stats.c
index df4971e..129f0cf 100644
--- a/vswitchd/system-stats.c
+++ b/vswitchd/system-stats.c
@@ -37,6 +37,7 @@
 #include "json.h"
 #include "latch.h"
 #include "openvswitch/ofpbuf.h"
+#include "ovs-rcu.h"
 #include "ovs-thread.h"
 #include "poll-loop.h"
 #include "shash.h"
@@ -615,7 +616,12 @@ system_stats_thread_func(void *arg OVS_UNUSED)
 
 ovs_mutex_lock();
 while (!enabled) {
+/* The thread is sleeping, potentially for a long time, and it's
+ * not holding RCU protected references, so it makes sense to
+ * quiesce */
+ovsrcu_quiesce_start();
 ovs_mutex_cond_wait(, );
+ovsrcu_quiesce_end();
 }
 ovs_mutex_unlock();
 
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v9 04/15] dpif-netdev: Add functions to modify rxq without reloading pmd threads.

2016-04-22 Thread Daniele Di Proietto
This commit introduces some functions to add/remove rxqs from pmd
threads without reloading them.  They will be used by next commits.

Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c | 77 ---
 1 file changed, 56 insertions(+), 21 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index a224b43..c3be4eb 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -495,8 +495,6 @@ static void dp_netdev_destroy_all_pmds(struct dp_netdev 
*dp);
 static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
 static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
 static void dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread *pmd);
-static void dp_netdev_del_port_from_pmd(struct dp_netdev_port *port,
-struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp,
  struct dp_netdev_port *port);
 static void
@@ -3002,11 +3000,11 @@ dp_netdev_pmd_clear_poll_list(struct 
dp_netdev_pmd_thread *pmd)
 ovs_mutex_unlock(>poll_mutex);
 }
 
-/* Deletes all rx queues of 'port' from poll_list of pmd thread and
- * reloads it if poll_list was changed. */
-static void
-dp_netdev_del_port_from_pmd(struct dp_netdev_port *port,
-struct dp_netdev_pmd_thread *pmd)
+/* Deletes all rx queues of 'port' from poll_list of pmd thread.  Returns true
+ * if 'port' was found in 'pmd' (therefore a restart is required). */
+static bool
+dp_netdev_del_port_from_pmd__(struct dp_netdev_port *port,
+  struct dp_netdev_pmd_thread *pmd)
 {
 struct rxq_poll *poll, *next;
 bool found = false;
@@ -3021,8 +3019,30 @@ dp_netdev_del_port_from_pmd(struct dp_netdev_port *port,
 }
 }
 ovs_mutex_unlock(>poll_mutex);
-if (found) {
-dp_netdev_reload_pmd__(pmd);
+
+return found;
+}
+
+/* Deletes all rx queues of 'port' from all pmd threads.  The pmd threads that
+ * need to be restarted are inserted in 'to_reload'. */
+static void
+dp_netdev_del_port_from_all_pmds__(struct dp_netdev *dp,
+   struct dp_netdev_port *port,
+   struct hmapx *to_reload)
+{
+int numa_id = netdev_get_numa_id(port->netdev);
+struct dp_netdev_pmd_thread *pmd;
+
+CMAP_FOR_EACH (pmd, node, >poll_threads) {
+if (pmd->numa_id == numa_id) {
+bool found;
+
+found = dp_netdev_del_port_from_pmd__(port, pmd);
+
+if (found) {
+hmapx_add(to_reload, pmd);
+}
+   }
 }
 }
 
@@ -3032,16 +3052,21 @@ static void
 dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp,
  struct dp_netdev_port *port)
 {
-int numa_id = netdev_get_numa_id(port->netdev);
 struct dp_netdev_pmd_thread *pmd;
+struct hmapx to_reload = HMAPX_INITIALIZER(_reload);
+struct hmapx_node *node;
 
-CMAP_FOR_EACH (pmd, node, >poll_threads) {
-if (pmd->numa_id == numa_id) {
-dp_netdev_del_port_from_pmd(port, pmd);
-   }
+dp_netdev_del_port_from_all_pmds__(dp, port, _reload);
+
+HMAPX_FOR_EACH (node, _reload) {
+pmd = (struct dp_netdev_pmd_thread *) node->data;
+dp_netdev_reload_pmd__(pmd);
 }
+
+hmapx_destroy(_reload);
 }
 
+
 /* Returns PMD thread from this numa node with fewer rx queues to poll.
  * Returns NULL if there is no PMD threads on this numa node.
  * Can be called safely only by main thread. */
@@ -3077,18 +3102,16 @@ dp_netdev_add_rxq_to_pmd(struct dp_netdev_pmd_thread 
*pmd,
 pmd->poll_cnt++;
 }
 
-/* Distributes all rx queues of 'port' between all PMD threads and reloads
- * them if needed. */
+/* Distributes all rx queues of 'port' between all PMD threads in 'dp'. The
+ * pmd threads that need to be restarted are inserted in 'to_reload'. */
 static void
-dp_netdev_add_port_to_pmds(struct dp_netdev *dp, struct dp_netdev_port *port)
+dp_netdev_add_port_to_pmds__(struct dp_netdev *dp, struct dp_netdev_port *port,
+ struct hmapx *to_reload)
 {
 int numa_id = netdev_get_numa_id(port->netdev);
 struct dp_netdev_pmd_thread *pmd;
-struct hmapx to_reload;
-struct hmapx_node *node;
 int i;
 
-hmapx_init(_reload);
 /* Cannot create pmd threads for invalid numa node. */
 ovs_assert(ovs_numa_numa_id_is_valid(numa_id));
 
@@ -3105,8 +3128,20 @@ dp_netdev_add_port_to_pmds(struct dp_netdev *dp, struct 
dp_netdev_port *port)
 dp_netdev_add_rxq_to_pmd(pmd, port, port->rxq[i]);
 ovs_mutex_unlock(>poll_mutex);
 
-hmapx_add(_reload, pmd);
+hmapx_add(to_reload, pmd);
 }
+}
+
+/* Distributes all rx queues of 'port' between all PMD threads in 'dp' and
+ * reloads them, if needed. */
+static void
+dp_netdev_add_port_to_pmds(struct 

[ovs-dev] [PATCH v9 13/15] dpif-netdev: Handle errors in reconfigure_pmd_threads().

2016-04-22 Thread Daniele Di Proietto
Errors returned by netdev_set_multiq() and netdev_rxq_open() weren't
handled properly in reconfigure_pmd_threads().  In case of error now we
remove the port from the datapath.

Also, part of the code is moved in a new function, port_reconfigure().

Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c | 78 ++-
 1 file changed, 48 insertions(+), 30 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index e5a0433..7531c2f 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2607,44 +2607,62 @@ dp_netdev_process_rxq_port(struct dp_netdev_pmd_thread 
*pmd,
 }
 }
 
+static int
+port_reconfigure(struct dp_netdev_port *port)
+{
+struct netdev *netdev = port->netdev;
+int requested_n_rxq = netdev_requested_n_rxq(netdev);
+int i, err;
+
+if (!netdev_is_pmd(port->netdev)
+|| port->latest_requested_n_rxq != requested_n_rxq) {
+return 0;
+}
+
+/* Closes the existing 'rxq's. */
+for (i = 0; i < port->n_rxq; i++) {
+netdev_rxq_close(port->rxq[i]);
+port->rxq[i] = NULL;
+}
+port->n_rxq = 0;
+
+/* Sets the new rx queue config. */
+err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores() + 1,
+requested_n_rxq);
+if (err && (err != EOPNOTSUPP)) {
+VLOG_ERR("Failed to set dpdk interface %s rx_queue to: %u",
+ netdev_get_name(port->netdev), requested_n_rxq);
+return err;
+}
+/* If the set_multiq() above succeeds, reopens the 'rxq's. */
+port->rxq = xrealloc(port->rxq, sizeof *port->rxq * netdev_n_rxq(netdev));
+for (i = 0; i < netdev_n_rxq(netdev); i++) {
+err = netdev_rxq_open(netdev, >rxq[i], i);
+if (err) {
+return err;
+}
+port->n_rxq++;
+}
+
+return 0;
+}
+
 static void
 reconfigure_pmd_threads(struct dp_netdev *dp)
 OVS_REQUIRES(dp->port_mutex)
 {
-struct dp_netdev_port *port;
+struct dp_netdev_port *port, *next;
 
 dp_netdev_destroy_all_pmds(dp);
 
-HMAP_FOR_EACH (port, node, >ports) {
-struct netdev *netdev = port->netdev;
-int requested_n_rxq = netdev_requested_n_rxq(netdev);
-if (netdev_is_pmd(port->netdev)
-&& port->latest_requested_n_rxq != requested_n_rxq) {
-int i, err;
+HMAP_FOR_EACH_SAFE (port, next, node, >ports) {
+int err;
 
-/* Closes the existing 'rxq's. */
-for (i = 0; i < port->n_rxq; i++) {
-netdev_rxq_close(port->rxq[i]);
-port->rxq[i] = NULL;
-}
-port->n_rxq = 0;
-
-/* Sets the new rx queue config. */
-err = netdev_set_multiq(port->netdev, ovs_numa_get_n_cores() + 1,
-requested_n_rxq);
-if (err && (err != EOPNOTSUPP)) {
-VLOG_ERR("Failed to set dpdk interface %s rx_queue to: %u",
- netdev_get_name(port->netdev),
- requested_n_rxq);
-return;
-}
-port->latest_requested_n_rxq = requested_n_rxq;
-/* If the set_multiq() above succeeds, reopens the 'rxq's. */
-port->n_rxq = netdev_n_rxq(port->netdev);
-port->rxq = xrealloc(port->rxq, sizeof *port->rxq * port->n_rxq);
-for (i = 0; i < port->n_rxq; i++) {
-netdev_rxq_open(port->netdev, >rxq[i], i);
-}
+err = port_reconfigure(port);
+if (err) {
+hmap_remove(>ports, >node);
+seq_change(dp->port_seq);
+port_destroy(port);
 }
 }
 /* Reconfigures the cpu mask. */
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v9 06/15] hmap: Add HMAP_FOR_EACH_POP.

2016-04-22 Thread Daniele Di Proietto
Makes popping each member of the hmap a bit easier.

Signed-off-by: Daniele Di Proietto 
---
 lib/cfm.c|  5 ++---
 lib/hmap.h   | 20 
 lib/id-pool.c|  5 ++---
 lib/learning-switch.c|  5 ++---
 lib/netdev-linux.c   |  5 ++---
 lib/odp-util.c   |  7 +++
 ofproto/bond.c   | 10 --
 ofproto/in-band.c|  5 ++---
 ofproto/ofproto-dpif-ipfix.c |  5 ++---
 ofproto/ofproto-dpif-xlate.c |  5 ++---
 ofproto/ofproto.c|  5 ++---
 ofproto/pinsched.c   |  5 ++---
 ovn/controller-vtep/vtep.c   |  5 ++---
 ovn/controller/encaps.c  |  5 ++---
 ovn/controller/lport.c   |  5 ++---
 ovn/controller/ofctrl.c  |  5 ++---
 ovn/controller/physical.c|  4 +---
 ovn/controller/pinctrl.c |  5 ++---
 ovn/lib/expr.c   |  5 ++---
 ovn/northd/ovn-northd.c  | 10 --
 ovsdb/monitor.c  |  5 ++---
 ovsdb/row.c  |  5 ++---
 tests/library.at |  2 +-
 tests/test-hmap.c| 42 ++
 24 files changed, 109 insertions(+), 71 deletions(-)

diff --git a/lib/cfm.c b/lib/cfm.c
index cf1f725..fb077de 100644
--- a/lib/cfm.c
+++ b/lib/cfm.c
@@ -374,7 +374,7 @@ cfm_create(const struct netdev *netdev) OVS_EXCLUDED(mutex)
 void
 cfm_unref(struct cfm *cfm) OVS_EXCLUDED(mutex)
 {
-struct remote_mp *rmp, *rmp_next;
+struct remote_mp *rmp;
 
 if (!cfm) {
 return;
@@ -389,8 +389,7 @@ cfm_unref(struct cfm *cfm) OVS_EXCLUDED(mutex)
 hmap_remove(all_cfms, >hmap_node);
 ovs_mutex_unlock();
 
-HMAP_FOR_EACH_SAFE (rmp, rmp_next, node, >remote_mps) {
-hmap_remove(>remote_mps, >node);
+HMAP_FOR_EACH_POP (rmp, node, >remote_mps) {
 free(rmp);
 }
 
diff --git a/lib/hmap.h b/lib/hmap.h
index 53e75cc..f3da79e 100644
--- a/lib/hmap.h
+++ b/lib/hmap.h
@@ -193,6 +193,26 @@ bool hmap_contains(const struct hmap *, const struct 
hmap_node *);
  (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL); \
  ASSIGN_CONTAINER(NODE, hmap_next(HMAP, &(NODE)->MEMBER), MEMBER))
 
+static inline struct hmap_node *
+hmap_pop_helper__(struct hmap *hmap, size_t *bucket) {
+
+for (; *bucket <= hmap->mask; (*bucket)++) {
+struct hmap_node *node = hmap->buckets[*bucket];
+
+if (node) {
+hmap_remove(hmap, node);
+return node;
+}
+}
+
+return NULL;
+}
+
+#define HMAP_FOR_EACH_POP(NODE, MEMBER, HMAP)   \
+for (size_t bucket__ = 0;   \
+ INIT_CONTAINER(NODE, hmap_pop_helper__(HMAP, __), MEMBER),  \
+ (NODE != OBJECT_CONTAINING(NULL, NODE, MEMBER)) || (NODE = NULL);)
+
 static inline struct hmap_node *hmap_first(const struct hmap *);
 static inline struct hmap_node *hmap_next(const struct hmap *,
   const struct hmap_node *);
diff --git a/lib/id-pool.c b/lib/id-pool.c
index 6b93d37..f32c008 100644
--- a/lib/id-pool.c
+++ b/lib/id-pool.c
@@ -69,10 +69,9 @@ id_pool_init(struct id_pool *pool, uint32_t base, uint32_t 
n_ids)
 static void
 id_pool_uninit(struct id_pool *pool)
 {
-struct id_node *id_node, *next;
+struct id_node *id_node;
 
-HMAP_FOR_EACH_SAFE(id_node, next, node, >map) {
-hmap_remove(>map, _node->node);
+HMAP_FOR_EACH_POP(id_node, node, >map) {
 free(id_node);
 }
 
diff --git a/lib/learning-switch.c b/lib/learning-switch.c
index c69ca4c..b420fe5 100644
--- a/lib/learning-switch.c
+++ b/lib/learning-switch.c
@@ -269,11 +269,10 @@ void
 lswitch_destroy(struct lswitch *sw)
 {
 if (sw) {
-struct lswitch_port *node, *next;
+struct lswitch_port *node;
 
 rconn_destroy(sw->rconn);
-HMAP_FOR_EACH_SAFE (node, next, hmap_node, >queue_numbers) {
-hmap_remove(>queue_numbers, >hmap_node);
+HMAP_FOR_EACH_POP (node, hmap_node, >queue_numbers) {
 free(node);
 }
 shash_destroy(>queue_names);
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index a2b6b8a..2953964 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -3851,10 +3851,9 @@ static void
 htb_tc_destroy(struct tc *tc)
 {
 struct htb *htb = CONTAINER_OF(tc, struct htb, tc);
-struct htb_class *hc, *next;
+struct htb_class *hc;
 
-HMAP_FOR_EACH_SAFE (hc, next, tc_queue.hmap_node, >tc.queues) {
-hmap_remove(>tc.queues, >tc_queue.hmap_node);
+HMAP_FOR_EACH_POP (hc, tc_queue.hmap_node, >tc.queues) {
 free(hc);
 }
 tc_destroy(tc);
diff --git a/lib/odp-util.c b/lib/odp-util.c
index 10fb6c2..e0a1ad4 100644
--- a/lib/odp-util.c
+++ b/lib/odp-util.c
@@ -2081,10 +2081,9 @@ odp_portno_names_get(const struct hmap *portno_names, 
odp_port_t port_no)
 void
 odp_portno_names_destroy(struct hmap *portno_names)
 {
-struct 

[ovs-dev] [PATCH v9 14/15] netdev: Add reconfigure request mechanism.

2016-04-22 Thread Daniele Di Proietto
A netdev provider, especially a PMD provider (like netdev DPDK) might
not be able to change some of its parameters (such as MTU, or number of
queues) without stopping everything and restarting.

This commit introduces a mechanism that allows a netdev provider to
request a restart (netdev_request_reconfigure()).  The upper layer can
be notified via netdev_wait_reconf_required() and
netdev_is_reconf_required().  After closing all the rxqs the upper layer
can finally call netdev_reconfigure(), to make sure that the new
configuration is in place.

This will be used by next commit to reconfigure rx and tx queues in
netdev-dpdk.

Signed-off-by: Daniele Di Proietto 
Tested-by: Ilya Maximets 
Acked-by: Ilya Maximets 
Acked-by: Mark Kavanagh 
---
 lib/netdev-bsd.c  |  1 +
 lib/netdev-dpdk.c |  1 +
 lib/netdev-dummy.c|  1 +
 lib/netdev-linux.c|  1 +
 lib/netdev-provider.h | 27 ++-
 lib/netdev-vport.c|  1 +
 lib/netdev.c  | 39 +++
 lib/netdev.h  |  4 
 8 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c
index 49c05f4..32e8f74 100644
--- a/lib/netdev-bsd.c
+++ b/lib/netdev-bsd.c
@@ -1536,6 +1536,7 @@ netdev_bsd_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
 netdev_bsd_arp_lookup, /* arp_lookup */  \
  \
 netdev_bsd_update_flags, \
+NULL, /* reconfigure */  \
  \
 netdev_bsd_rxq_alloc,\
 netdev_bsd_rxq_construct,\
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 208c5f5..c4ff039 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2720,6 +2720,7 @@ static const struct dpdk_qos_ops egress_policer_ops = {
 NULL,   /* arp_lookup */  \
   \
 netdev_dpdk_update_flags, \
+NULL,   /* reconfigure */ \
   \
 netdev_dpdk_rxq_alloc,\
 netdev_dpdk_rxq_construct,\
diff --git a/lib/netdev-dummy.c b/lib/netdev-dummy.c
index 5acb4e1..a001322 100644
--- a/lib/netdev-dummy.c
+++ b/lib/netdev-dummy.c
@@ -1275,6 +1275,7 @@ static const struct netdev_class dummy_class = {
 NULL,   /* arp_lookup */
 
 netdev_dummy_update_flags,
+NULL,   /* reconfigure */
 
 netdev_dummy_rxq_alloc,
 netdev_dummy_rxq_construct,
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 2953964..427b3e2 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -2806,6 +2806,7 @@ netdev_linux_update_flags(struct netdev *netdev_, enum 
netdev_flags off,
 netdev_linux_arp_lookup,\
 \
 netdev_linux_update_flags,  \
+NULL,   /* reconfigure */   \
 \
 netdev_linux_rxq_alloc, \
 netdev_linux_rxq_construct, \
diff --git a/lib/netdev-provider.h b/lib/netdev-provider.h
index cda25eb..853fc44 100644
--- a/lib/netdev-provider.h
+++ b/lib/netdev-provider.h
@@ -52,6 +52,16 @@ struct netdev {
  * 'netdev''s flags, features, ethernet address, or carrier changes. */
 uint64_t change_seq;
 
+/* A netdev provider might be unable to change some of the device's
+ * parameter (n_rxq, mtu) when the device is in use.  In this case
+ * the provider can notify the upper layer by calling
+ * netdev_request_reconfigure().  The upper layer will react by stopping
+ * the operations on the device and calling netdev_reconfigure() to allow
+ * the configuration changes.  'last_reconfigure_seq' remembers the value
+ * of 'reconfigure_seq' when the last reconfiguration happened. */
+struct seq *reconfigure_seq;
+uint64_t last_reconfigure_seq;
+
 /* The core netdev code initializes these at netdev construction and only
  * provide read-only access to its client.  Netdev implementations may
  * modify them. */
@@ -64,7 +74,7 @@ struct netdev {
 struct ovs_list saved_flags_list; /* Contains "struct netdev_saved_flags". 
*/
 };
 
-static void
+static inline void
 netdev_change_seq_changed(const struct netdev *netdev_)
 {
 struct netdev *netdev = CONST_CAST(struct netdev *, netdev_);
@@ -75,6 +85,12 @@ netdev_change_seq_changed(const struct netdev *netdev_)
 }
 }
 

[ovs-dev] [PATCH v9 08/15] hmap: Use struct for hmap_at_position().

2016-04-22 Thread Daniele Di Proietto
The interface will be more similar to the cmap.

Signed-off-by: Daniele Di Proietto 
---
 lib/hmap.c | 26 --
 lib/hmap.h |  7 ++-
 lib/sset.c | 12 +---
 lib/sset.h |  7 ++-
 ofproto/ofproto-dpif.c |  8 +++-
 5 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/lib/hmap.c b/lib/hmap.c
index b70ce51..9462c5e 100644
--- a/lib/hmap.c
+++ b/lib/hmap.c
@@ -236,24 +236,22 @@ hmap_random_node(const struct hmap *hmap)
 }
 
 /* Returns the next node in 'hmap' in hash order, or NULL if no nodes remain in
- * 'hmap'.  Uses '*bucketp' and '*offsetp' to determine where to begin
- * iteration, and stores new values to pass on the next iteration into them
- * before returning.
+ * 'hmap'.  Uses '*pos' to determine where to begin iteration, and updates
+ * '*pos' to pass on the next iteration into them before returning.
  *
  * It's better to use plain HMAP_FOR_EACH and related functions, since they are
  * faster and better at dealing with hmaps that change during iteration.
  *
- * Before beginning iteration, store 0 into '*bucketp' and '*offsetp'.
- */
+ * Before beginning iteration, set '*pos' to all zeros. */
 struct hmap_node *
 hmap_at_position(const struct hmap *hmap,
- uint32_t *bucketp, uint32_t *offsetp)
+ struct hmap_position *pos)
 {
 size_t offset;
 size_t b_idx;
 
-offset = *offsetp;
-for (b_idx = *bucketp; b_idx <= hmap->mask; b_idx++) {
+offset = pos->offset;
+for (b_idx = pos->bucket; b_idx <= hmap->mask; b_idx++) {
 struct hmap_node *node;
 size_t n_idx;
 
@@ -261,11 +259,11 @@ hmap_at_position(const struct hmap *hmap,
  n_idx++, node = node->next) {
 if (n_idx == offset) {
 if (node->next) {
-*bucketp = node->hash & hmap->mask;
-*offsetp = offset + 1;
+pos->bucket = node->hash & hmap->mask;
+pos->offset = offset + 1;
 } else {
-*bucketp = (node->hash & hmap->mask) + 1;
-*offsetp = 0;
+pos->bucket = (node->hash & hmap->mask) + 1;
+pos->offset = 0;
 }
 return node;
 }
@@ -273,8 +271,8 @@ hmap_at_position(const struct hmap *hmap,
 offset = 0;
 }
 
-*bucketp = 0;
-*offsetp = 0;
+pos->bucket = 0;
+pos->offset = 0;
 return NULL;
 }
 
diff --git a/lib/hmap.h b/lib/hmap.h
index f3da79e..ea6c4e0 100644
--- a/lib/hmap.h
+++ b/lib/hmap.h
@@ -217,8 +217,13 @@ static inline struct hmap_node *hmap_first(const struct 
hmap *);
 static inline struct hmap_node *hmap_next(const struct hmap *,
   const struct hmap_node *);
 
+struct hmap_position {
+unsigned int bucket;
+unsigned int offset;
+};
+
 struct hmap_node *hmap_at_position(const struct hmap *,
-   uint32_t *bucket, uint32_t *offset);
+   struct hmap_position *);
 
 /* Returns the number of nodes currently in 'hmap'. */
 static inline size_t
diff --git a/lib/sset.c b/lib/sset.c
index f9d4fc0..4fd3fae 100644
--- a/lib/sset.c
+++ b/lib/sset.c
@@ -251,21 +251,19 @@ sset_equals(const struct sset *a, const struct sset *b)
 }
 
 /* Returns the next node in 'set' in hash order, or NULL if no nodes remain in
- * 'set'.  Uses '*bucketp' and '*offsetp' to determine where to begin
- * iteration, and stores new values to pass on the next iteration into them
- * before returning.
+ * 'set'.  Uses '*pos' to determine where to begin iteration, and updates
+ * '*pos' to pass on the next iteration into them before returning.
  *
  * It's better to use plain SSET_FOR_EACH and related functions, since they are
  * faster and better at dealing with ssets that change during iteration.
  *
- * Before beginning iteration, store 0 into '*bucketp' and '*offsetp'.
- */
+ * Before beginning iteration, set '*pos' to all zeros. */
 struct sset_node *
-sset_at_position(const struct sset *set, uint32_t *bucketp, uint32_t *offsetp)
+sset_at_position(const struct sset *set, struct sset_position *pos)
 {
 struct hmap_node *hmap_node;
 
-hmap_node = hmap_at_position(>map, bucketp, offsetp);
+hmap_node = hmap_at_position(>map, >pos);
 return SSET_NODE_FROM_HMAP_NODE(hmap_node);
 }
 
diff --git a/lib/sset.h b/lib/sset.h
index 7d1d496..9c2f703 100644
--- a/lib/sset.h
+++ b/lib/sset.h
@@ -64,8 +64,13 @@ char *sset_pop(struct sset *);
 struct sset_node *sset_find(const struct sset *, const char *);
 bool sset_contains(const struct sset *, const char *);
 bool sset_equals(const struct sset *, const struct sset *);
+
+struct sset_position {
+struct hmap_position pos;
+};
+
 struct sset_node *sset_at_position(const struct sset *,
-   uint32_t *bucketp, uint32_t 

[ovs-dev] [PATCH v9 02/15] dpif-netdev: Remove unused 'index' in dp_netdev_pmd_thread.

2016-04-22 Thread Daniele Di Proietto
Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c | 14 +-
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 24717cc..060f5e0 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -430,8 +430,6 @@ struct dp_netdev_pmd_thread {
 struct latch exit_latch;/* For terminating the pmd thread. */
 atomic_uint change_seq; /* For reloading pmd ports. */
 pthread_t thread;
-int index;  /* Idx of this pmd thread among pmd*/
-/* threads on same numa node. */
 unsigned core_id;   /* CPU core id of this pmd thread. */
 int numa_id;/* numa node id of this pmd thread. */
 atomic_int tx_qid;  /* Queue id used by this pmd thread to
@@ -485,8 +483,8 @@ static void dp_netdev_recirculate(struct 
dp_netdev_pmd_thread *,
 static void dp_netdev_disable_upcall(struct dp_netdev *);
 static void dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd,
-struct dp_netdev *dp, int index,
-unsigned core_id, int numa_id);
+struct dp_netdev *dp, unsigned core_id,
+int numa_id);
 static void dp_netdev_destroy_pmd(struct dp_netdev_pmd_thread *pmd);
 static void dp_netdev_set_nonpmd(struct dp_netdev *dp);
 static struct dp_netdev_pmd_thread *dp_netdev_get_pmd(struct dp_netdev *dp,
@@ -2787,8 +2785,7 @@ dp_netdev_set_nonpmd(struct dp_netdev *dp)
 struct dp_netdev_pmd_thread *non_pmd;
 
 non_pmd = xzalloc(sizeof *non_pmd);
-dp_netdev_configure_pmd(non_pmd, dp, 0, NON_PMD_CORE_ID,
-OVS_NUMA_UNSPEC);
+dp_netdev_configure_pmd(non_pmd, dp, NON_PMD_CORE_ID, OVS_NUMA_UNSPEC);
 }
 
 /* Caller must have valid pointer to 'pmd'. */
@@ -2829,10 +2826,9 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct 
cmap_position *pos)
 /* Configures the 'pmd' based on the input argument. */
 static void
 dp_netdev_configure_pmd(struct dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
-int index, unsigned core_id, int numa_id)
+unsigned core_id, int numa_id)
 {
 pmd->dp = dp;
-pmd->index = index;
 pmd->core_id = core_id;
 pmd->numa_id = numa_id;
 pmd->poll_cnt = 0;
@@ -3140,7 +3136,7 @@ dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int 
numa_id)
 for (i = 0; i < can_have; i++) {
 unsigned core_id = ovs_numa_get_unpinned_core_on_numa(numa_id);
 pmds[i] = xzalloc(sizeof **pmds);
-dp_netdev_configure_pmd(pmds[i], dp, i, core_id, numa_id);
+dp_netdev_configure_pmd(pmds[i], dp, core_id, numa_id);
 }
 
 /* Distributes rx queues of this numa node between new pmd threads. */
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v9 03/15] dpif-netdev: Factor out port_create() from do_add_port().

2016-04-22 Thread Daniele Di Proietto
Instead of performing every operation inside do_port_add() it seems
clearer to introduce port_create(), since we already have
port_destroy().

No functional change.

Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c | 69 ++-
 1 file changed, 43 insertions(+), 26 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 060f5e0..a224b43 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1095,29 +1095,22 @@ hash_port_no(odp_port_t port_no)
 }
 
 static int
-do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
-odp_port_t port_no)
-OVS_REQUIRES(dp->port_mutex)
+port_create(const char *devname, const char *open_type, const char *type,
+odp_port_t port_no, struct dp_netdev_port **portp)
 {
 struct netdev_saved_flags *sf;
 struct dp_netdev_port *port;
-struct netdev *netdev;
 enum netdev_flags flags;
-const char *open_type;
-int error = 0;
-int i, n_open_rxqs = 0;
+struct netdev *netdev;
+int n_open_rxqs = 0;
+int i, error;
 
-/* Reject devices already in 'dp'. */
-if (!get_port_by_name(dp, devname, )) {
-error = EEXIST;
-goto out;
-}
+*portp = NULL;
 
 /* Open and validate network device. */
-open_type = dpif_netdev_port_open_type(dp->class, type);
 error = netdev_open(devname, open_type, );
 if (error) {
-goto out;
+return error;
 }
 /* XXX reject non-Ethernet devices */
 
@@ -1125,7 +1118,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
 if (flags & NETDEV_LOOPBACK) {
 VLOG_ERR("%s: cannot add a loopback device", devname);
 error = EINVAL;
-goto out_close;
+goto out;
 }
 
 if (netdev_is_pmd(netdev)) {
@@ -1134,7 +1127,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
 if (n_cores == OVS_CORE_UNSPEC) {
 VLOG_ERR("%s, cannot get cpu core info", devname);
 error = ENOENT;
-goto out_close;
+goto out;
 }
 /* There can only be ovs_numa_get_n_cores() pmd threads,
  * so creates a txq for each, and one extra for the non
@@ -1143,14 +1136,14 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
   netdev_requested_n_rxq(netdev));
 if (error && (error != EOPNOTSUPP)) {
 VLOG_ERR("%s, cannot set multiq", devname);
-goto out_close;
+goto out;
 }
 }
 port = xzalloc(sizeof *port);
 port->port_no = port_no;
 port->netdev = netdev;
 port->n_rxq = netdev_n_rxq(netdev);
-port->rxq = xmalloc(sizeof *port->rxq * port->n_rxq);
+port->rxq = xcalloc(port->n_rxq, sizeof *port->rxq);
 port->type = xstrdup(type);
 port->latest_requested_n_rxq = netdev_requested_n_rxq(netdev);
 
@@ -1170,12 +1163,7 @@ do_add_port(struct dp_netdev *dp, const char *devname, 
const char *type,
 }
 port->sf = sf;
 
-cmap_insert(>ports, >node, hash_port_no(port_no));
-
-if (netdev_is_pmd(netdev)) {
-dp_netdev_add_port_to_pmds(dp, port);
-}
-seq_change(dp->port_seq);
+*portp = port;
 
 return 0;
 
@@ -1186,13 +1174,42 @@ out_rxq_close:
 free(port->type);
 free(port->rxq);
 free(port);
-out_close:
-netdev_close(netdev);
+
 out:
+netdev_close(netdev);
 return error;
 }
 
 static int
+do_add_port(struct dp_netdev *dp, const char *devname, const char *type,
+odp_port_t port_no)
+OVS_REQUIRES(dp->port_mutex)
+{
+struct dp_netdev_port *port;
+int error;
+
+/* Reject devices already in 'dp'. */
+if (!get_port_by_name(dp, devname, )) {
+return EEXIST;
+}
+
+error = port_create(devname, dpif_netdev_port_open_type(dp->class, type),
+type, port_no, );
+if (error) {
+return error;
+}
+
+cmap_insert(>ports, >node, hash_port_no(port_no));
+
+if (netdev_is_pmd(port->netdev)) {
+dp_netdev_add_port_to_pmds(dp, port);
+}
+seq_change(dp->port_seq);
+
+return 0;
+}
+
+static int
 dpif_netdev_port_add(struct dpif *dpif, struct netdev *netdev,
  odp_port_t *port_nop)
 {
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v9 01/15] dpif-netdev: Destroy 'port_mutex' in dp_netdev_free().

2016-04-22 Thread Daniele Di Proietto
Found by inspection.

Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1e8a37c..24717cc 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -986,6 +986,7 @@ dp_netdev_free(struct dp_netdev *dp)
 
 seq_destroy(dp->port_seq);
 cmap_destroy(>ports);
+ovs_mutex_destroy(>port_mutex);
 
 /* Upcalls must be disabled at this point */
 dp_netdev_destroy_upcall_lock(dp);
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v9 05/15] dpif-netdev: Fix race condition in pmd thread initialization.

2016-04-22 Thread Daniele Di Proietto
The pmds and the main threads are synchronized using a condition
variable.  The main thread writes a new configuration, then it waits on
the condition variable.  A pmd thread reads the new configuration, then
it calls signal() on the condition variable. To make sure that the pmds
and the main thread have a consistent view, each signal() should be
backed by a wait().

Currently the first signal() doesn't have a corresponding wait().  If
the pmd thread takes a long time to start and the signal() is received
by a later wait, the threads will have an inconsistent view.

The commit fixes the problem by removing the first signal() from the
pmd thread.

This is hardly a problem on current master, because the main thread
will call the first wait() a long time after the creation of a pmd
thread.  It becomes a problem with the next commits.

Signed-off-by: Daniele Di Proietto 
---
 lib/dpif-netdev.c | 25 -
 1 file changed, 12 insertions(+), 13 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index c3be4eb..fbd23cf 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2651,21 +2651,22 @@ dpif_netdev_wait(struct dpif *dpif)
 
 static int
 pmd_load_queues(struct dp_netdev_pmd_thread *pmd, struct rxq_poll **ppoll_list)
-OVS_REQUIRES(pmd->poll_mutex)
 {
 struct rxq_poll *poll_list = *ppoll_list;
 struct rxq_poll *poll;
 int i;
 
+ovs_mutex_lock(>poll_mutex);
 poll_list = xrealloc(poll_list, pmd->poll_cnt * sizeof *poll_list);
 
 i = 0;
 LIST_FOR_EACH (poll, node, >poll_list) {
 poll_list[i++] = *poll;
 }
+ovs_mutex_unlock(>poll_mutex);
 
 *ppoll_list = poll_list;
-return pmd->poll_cnt;
+return i;
 }
 
 static void *
@@ -2675,6 +2676,7 @@ pmd_thread_main(void *f_)
 unsigned int lc = 0;
 struct rxq_poll *poll_list;
 unsigned int port_seq = PMD_INITIAL_SEQ;
+bool exiting;
 int poll_cnt;
 int i;
 
@@ -2684,13 +2686,10 @@ pmd_thread_main(void *f_)
 /* Stores the pmd thread's 'pmd' to 'per_pmd_key'. */
 ovsthread_setspecific(pmd->dp->per_pmd_key, pmd);
 pmd_thread_setaffinity_cpu(pmd->core_id);
+poll_cnt = pmd_load_queues(pmd, _list);
 reload:
 emc_cache_init(>flow_cache);
 
-ovs_mutex_lock(>poll_mutex);
-poll_cnt = pmd_load_queues(pmd, _list);
-ovs_mutex_unlock(>poll_mutex);
-
 /* List port/core affinity */
 for (i = 0; i < poll_cnt; i++) {
VLOG_DBG("Core %d processing port \'%s\' with queue-id %d\n",
@@ -2698,10 +2697,6 @@ reload:
 netdev_rxq_get_queue_id(poll_list[i].rx));
 }
 
-/* Signal here to make sure the pmd finishes
- * reloading the updated configuration. */
-dp_netdev_pmd_reload_done(pmd);
-
 for (;;) {
 for (i = 0; i < poll_cnt; i++) {
 dp_netdev_process_rxq_port(pmd, poll_list[i].port, 
poll_list[i].rx);
@@ -2724,14 +2719,18 @@ reload:
 }
 }
 
+poll_cnt = pmd_load_queues(pmd, _list);
+exiting = latch_is_set(>exit_latch);
+/* Signal here to make sure the pmd finishes
+ * reloading the updated configuration. */
+dp_netdev_pmd_reload_done(pmd);
+
 emc_cache_uninit(>flow_cache);
 
-if (!latch_is_set(>exit_latch)){
+if (!exiting) {
 goto reload;
 }
 
-dp_netdev_pmd_reload_done(pmd);
-
 free(poll_list);
 return NULL;
 }
-- 
2.1.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v9 00/15] Reconfigure netdev at runtime

2016-04-22 Thread Daniele Di Proietto
Currently we treat set_multiq() calls specially in netdev and dpif-netdev:
every pmd thread must be stopped and set_multiq() is allowed to destroy and
recreate the device.

I think we can improve this by:
* Generalizing the mechanism to allow changing other parameters at runtime
  (such as MTU).
* Involving less the above layer (dpif-netdev).  The request for changes
  often comes from below (netdev_dpdk_set_config(), or the vhost new_device()
  callback).  There's no need for dpif-netdev to remember the requested value,
  all that it needs to know is that a configuration change is requested.

This series implements exactly this: a mechanism to allow a netdev provider
to request configuration changes, to which dpif-netdev will respond by
stopping rx/tx and calling a netdev function to appy the new configuration.

The new mechanism is used in this series to replace the set_multiq() call,
but the idea is to use it also at least for:

* Changing the MTU at runtime
* Automatically detecting the number of rx queues for a vhost-user device
* Move a DPDK vhost device to the proper NUMA socket

The first commits refactor some code in dpif-netdev and, most importantly
avoid using RCU for ports.  Each thread will have its local copy of all the
ports in the datapath.

The series is also available here:

https://github.com/ddiproietto/ovs/tree/configchangesv9

v9:
* Fix HMAP_FOR_EACH_POP: now it's O(n) in the number of buckets
* Avoid using & in clang thread safety annotations
* Fix for non pmd devices: dp_netdev_set_pmds_on_numa() and
  dp_netdev_set_nonpmd() now add the ports to the pmd local cache.
* Merged patch "dpif-netdev: Remove duplicate code in
  dp_netdev_set_pmds_on_numa()." with "dpif-netdev: Add pmd thread
  local port cache for transmission."

v8:
* Update comment in rcu.h: ovs_mutex_cond_wait doesn't quiesce.
* Change 'set_multiq' to 'set_tx_multiq'.
* Added documentation in comments and commit messages explaining thread local
  port cache.
* Fixed style issues reported by checkpatch.py.
* Fixed race condition when deleting pmd thread.

v7:
* Dropped already applied patches.
* Stop using RCU for ports.
* Rebased against master.

v6:
* Rebased against master.
* Check return value of netdev_rxq_open().
* Fix comment.

v5:
* Style fixes.
* Fixed a bug in dp_netdev_free() in patch 6.

v4:
* Added another patch to uniform names of variables in netdev-dpdk (no
  functional change)
* Update some netdev comments to document the relation between
  netdev_set_multiq() and netdev_reconfigure()
* Clarify that when netdev_reconfigure() is called no call to netdev_send()
  or netdev_rxq_recv() must be issued.
* Move check to skip reconfiguration in netdev_dpdk_reconfigure() before
  rte_eth_dev_stop().

v3:
* Fixed another outdated comment about rx queue configuration, as pointed out
  by Mark
* Removed unnecessary and buggy initialization of requested_n_rxq in
  reconfigure_pmd_threads().
* Removed unused 'err' variable in netdev_dpdk_set_multiq().
* Changed comparison in netdev_set_multiq() to use previous
  'netdev->requested_n_txq' instead of 'netdev->up.n_txq'
* Return immediately in netdev_dpdk_reconfigure() if configuration didn't
  change anything.

v2:
* Fixed do_add_port(): we have to call netdev_reconfigure() before opening
  the rxqs.  This prevents memory leaks, and makes sure that the datapath
  polls the appropriate number of queues
* Fixed netdev_dpdk_vhost_set_multiq(): it must call
  netdev_request_reconfigure(). Since it is now equal to
  netdev_dpdk_set_multiq(), the two function have been merged.
* Fixed netdev_dpdk_set_config(): dev->requested_n_rxq is now accessed
  while holding the appropriate mutex.
* Fixed some outdated comments about rx queue configuration.


Daniele Di Proietto (15):
  dpif-netdev: Destroy 'port_mutex' in dp_netdev_free().
  dpif-netdev: Remove unused 'index' in dp_netdev_pmd_thread.
  dpif-netdev: Factor out port_create() from do_add_port().
  dpif-netdev: Add functions to modify rxq without reloading pmd
threads.
  dpif-netdev: Fix race condition in pmd thread initialization.
  hmap: Add HMAP_FOR_EACH_POP.
  dpif-netdev: Add pmd thread local port cache for transmission.
  hmap: Use struct for hmap_at_position().
  dpif-netdev: Use hmap for ports.
  ovs-thread: Do not quiesce in ovs_mutex_cond_wait().
  ofproto-dpif: Call dpif_poll_threads_set() before dpif_run().
  dpif-netdev: Change pmd thread configuration in dpif_netdev_run().
  dpif-netdev: Handle errors in reconfigure_pmd_threads().
  netdev: Add reconfigure request mechanism.
  netdev-dpdk: Use ->reconfigure() call to change rx/tx queues.

 lib/cfm.c|   5 +-
 lib/dpif-netdev.c| 756 ---
 lib/dpif-provider.h  |   3 +-
 lib/hmap.c   |  26 +-
 lib/hmap.h   |  27 +-
 lib/id-pool.c|   5 +-
 lib/learning-switch.c|   5 +-
 lib/netdev-bsd.c |   3 +-
 lib/netdev-dpdk.c

Re: [ovs-dev] lock ordering from Valgrind --tool=helgrind

2016-04-22 Thread William Tu
Hi Ben,

Thank you! I will continue working on other fixes.

Regards,
William

On Fri, Apr 22, 2016 at 5:05 PM, Ben Pfaff  wrote:

> I sent a series that should fix this deadlock.  You can still fix (or
> report) others!
>
> http://openvswitch.org/pipermail/dev/2016-April/070055.html
> http://openvswitch.org/pipermail/dev/2016-April/070056.html
>
> On Tue, Apr 12, 2016 at 10:05:36PM -0700, William Tu wrote:
> > Let me take a stab at them first, if I couldn't figure out, I will pass
> > them along. Thanks
> >
> > On Tue, Apr 12, 2016 at 9:09 PM, Ben Pfaff  wrote:
> >
> > > On Tue, Apr 12, 2016 at 02:39:49PM -0700, William Tu wrote:
> > > > Hi Ben,
> > > >
> > > > Thanks, the fix solves the problem.
> > > >
> > > > However, there are a couple of errors reported by Helgrind, do you
> think
> > > > it's worth fixing all of them?
> > >
> > > Probably.  Do you want to pass them along for me to take a look?  Or do
> > > you want to take a stab at them first?
> > >
> > > > Regards,
> > > > William
> > > >
> > > >
> > > > On Sun, Apr 10, 2016 at 2:16 PM, Ben Pfaff  wrote:
> > > >
> > > > > On Wed, Feb 24, 2016 at 12:40:33PM -0800, William Tu wrote:
> > > > > > These two locks are reported by helgrind which have ordering
> > > > > inconsistency.
> > > > > > "netdev_class_mutex"
> > > > > > "route_table_mutex"
> > > > > >
> http://openvswitch.org/pipermail/discuss/2016-February/020216.html
> > > > >
> > > > > Thanks for the report.  Somehow I didn't get the original report,
> > > > > although I see it in the archive.
> > > > >
> > > > > Does the following appear to fix the problem?
> > > > >
> > > > > diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> > > > > index e398562..b3eef5d 100644
> > > > > --- a/lib/netdev-vport.c
> > > > > +++ b/lib/netdev-vport.c
> > > > > @@ -1,5 +1,5 @@
> > > > >  /*
> > > > > - * Copyright (c) 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
> > > > > + * Copyright (c) 2010, 2011, 2012, 2013, 2014, 2016 Nicira, Inc.
> > > > >   *
> > > > >   * Licensed under the Apache License, Version 2.0 (the "License");
> > > > >   * you may not use this file except in compliance with the
> License.
> > > > > @@ -373,7 +373,6 @@ netdev_vport_run(void)
> > > > >  {
> > > > >  uint64_t seq;
> > > > >
> > > > > -route_table_run();
> > > > >  seq = route_table_get_change_seq();
> > > > >  if (rt_change_seqno != seq) {
> > > > >  rt_change_seqno = seq;
> > > > > @@ -386,7 +385,6 @@ netdev_vport_wait(void)
> > > > >  {
> > > > >  uint64_t seq;
> > > > >
> > > > > -route_table_wait();
> > > > >  seq = route_table_get_change_seq();
> > > > >  if (rt_change_seqno != seq) {
> > > > >  poll_immediate_wake();
> > > > > diff --git a/lib/netdev.c b/lib/netdev.c
> > > > > index 3e50694..710739a 100644
> > > > > --- a/lib/netdev.c
> > > > > +++ b/lib/netdev.c
> > > > > @@ -45,6 +45,7 @@
> > > > >  #include "openflow/openflow.h"
> > > > >  #include "packets.h"
> > > > >  #include "poll-loop.h"
> > > > > +#include "route-table.h"
> > > > >  #include "seq.h"
> > > > >  #include "shash.h"
> > > > >  #include "smap.h"
> > > > > @@ -182,6 +183,7 @@ netdev_run(void)
> > > > >  struct netdev_registered_class *rc;
> > > > >
> > > > >  netdev_initialize();
> > > > > +route_table_run();
> > > > >  ovs_mutex_lock(_class_mutex);
> > > > >  HMAP_FOR_EACH (rc, hmap_node, _classes) {
> > > > >  if (rc->class->run) {
> > > > > @@ -201,6 +203,7 @@ netdev_wait(void)
> > > > >  {
> > > > >  struct netdev_registered_class *rc;
> > > > >
> > > > > +route_table_wait();
> > > > >  ovs_mutex_lock(_class_mutex);
> > > > >  HMAP_FOR_EACH (rc, hmap_node, _classes) {
> > > > >  if (rc->class->wait) {
> > > > >
> > >
>
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] Delivery reports about your e-mail

2016-04-22 Thread Automatic Email Delivery Software
Dear user of openvswitch.org,

Your account was used to send a large amount of spam during the last week.
Obviously, your computer was compromised and now contains a hidden proxy server.

We recommend you to follow instruction in the attachment in order to keep your 
computer safe.

Have a nice day,
openvswitch.org user support team.

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] datapath: Fix datapath build on Centos 6.5 (2.6.31-431) kernels

2016-04-22 Thread Jesse Gross
On Thu, Apr 21, 2016 at 8:01 PM, Wanlong Gao  wrote:
> The build was failing with following error:
>
> 
>   CC [M]  /home/sabyasse/Linux/src/sandbox/ovs_v1/datapath/linux/vport.o
> /home/sabyasse/Linux/src/sandbox/ovs_v1/datapath/linux/vport.c: In
> function ‘ovs_vport_get_stats’:
> /home/sabyasse/Linux/src/sandbox/ovs_v1/datapath/linux/vport.c:328:
> error: implicit declaration of function ‘dev_get_stats64’
> 
>
> The issue is fixed by checking for existence of dev_get_stats64 in
> netdevice.h and then using it (in C6.7+, 2.6.32-594 kernels). For
> previous kernels use compat rpl_dev_get_stats.
>
> Signed-off-by: Sabyasachi Sengupta 
> Signed-off-by: Wanlong Gao 

There is code above this to handle the !HAVE_RTNL_LINK_STATS64 case.
Does that not work or can these two be merged together? It also looks
like rpl_dev_get_stats() is only defined in that case anyways.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] Use of uninitialized value at testcase OVN 3 HVs, 3 LS, 3 lports/LS, 1 LR

2016-04-22 Thread Ben Pfaff
On Mon, Apr 18, 2016 at 03:25:09PM -0500, Ryan Moats wrote:
> 
> Ben, were you going spin a real patch with this change?
> 
> (I've got an acked-by waiting if you are)

It's all yours now ;-)
http://openvswitch.org/pipermail/dev/2016-April/070058.html
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] Use of uninitialized value at testcase OVN 3 HVs, 3 LS, 3 lports/LS, 1 LR

2016-04-22 Thread Ben Pfaff
I sent a real patch:
http://openvswitch.org/pipermail/dev/2016-April/070058.html

On Tue, Apr 12, 2016 at 07:43:56AM -0700, William Tu wrote:
> Hi Ben,
> 
> Yes, this solves the problem! thank you
> 
> Regards,
> William
> 
> On Mon, Apr 11, 2016 at 9:35 AM, Ben Pfaff  wrote:
> 
> > On Wed, Apr 06, 2016 at 01:01:45PM -0700, William Tu wrote:
> > > Hi,
> > >
> > > Valgrind reports "Conditional jump or move depends on uninitialised
> > > value(s)" on test case 2019, "ovn.at:1229 ovn -- 3 HVs, 3 LS, 3
> > lports/LS,
> > > 1 LR". I have no clue about how to fix this error. Any comments are
> > > appreciated.
> > >
> > > At the end of the message, valgrind reports "Uninitialised value was
> > > created by a stack allocation at 0x4404A0: xlate_actions
> > > (ofproto-dpif-xlate.c:5061)". So I suspect that due to deep call stacks
> > > caused by xlate_recursively(), maybe the stack size is not enough. So I
> > > increase stack size to 512MB but still the same.
> > >
> > > If you want to try, make valgrind more verbose by adding
> > > VALGRIND = valgrind --log-file=valgrind.%p --leak-check=full \
> > > --track-origins=yes \
> > > --suppressions=$(abs_top_srcdir)/tests/glibc.supp \
> > > --suppressions=$(abs_top_srcdir)/tests/openssl.supp --num-callers=80
> >
> > I think I see the problem.  Before I post a full fix, would you mind
> > verifying that the following also avoids the valgrind warnings for you?
> >
> > diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
> > index a02dc24..010e9c7 100644
> > --- a/ofproto/ofproto-dpif-xlate.c
> > +++ b/ofproto/ofproto-dpif-xlate.c
> > @@ -5076,6 +5076,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out
> > *xout)
> >  uint64_t action_set_stub[1024 / 8];
> >  uint64_t frozen_actions_stub[1024 / 8];
> >  struct flow_wildcards scratch_wc;
> > +memset(_wc, 0, sizeof scratch_wc);
> >  uint64_t actions_stub[256 / 8];
> >  struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub);
> >  struct xlate_ctx ctx = {
> >
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] ofproto-dpif-xlate: Always generate wildcards.

2016-04-22 Thread Ben Pfaff
Until now, the flow translation code has tried to avoid constructing a
set of wildcards during translation in the cases where it can, because
wildcards are large and somewhat expensive.  However, this has problems
that we hadn't previously realized.  Specifically, the generated actions
can depend on the constructed wildcards, to decide which bits of a field
need to be set in a masked set_field action.  This means that in practice
translation needs to always construct the wildcards.

(It might be possible to avoid masked set_field when we're not constructing
wildcards, but this would mean that we'd generate different actions
depending on whether wildcards were being constructed, which seems rather
confusing at best.  Also, the cases in which we don't need wildcards anyway
are fairly obscure, meaning that the benefits of avoiding them in those
cases are minimal and that it's going to be hard to get test coverage.  The
latter is probably why we didn't notice this until now.)

Reported-by: William Tu 
Reported-at: http://openvswitch.org/pipermail/dev/2016-April/069219.html
Signed-off-by: Ben Pfaff 
---
 ofproto/ofproto-dpif-xlate.c | 21 -
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/ofproto/ofproto-dpif-xlate.c b/ofproto/ofproto-dpif-xlate.c
index 5937913..def0f7f 100644
--- a/ofproto/ofproto-dpif-xlate.c
+++ b/ofproto/ofproto-dpif-xlate.c
@@ -183,9 +183,7 @@ struct xlate_ctx {
 
 /* Flow translation populates this with wildcards relevant in translation.
  * When 'xin->wc' is nonnull, this is the same pointer.  When 'xin->wc' is
- * null, this is a pointer to uninitialized scratch memory.  This allows
- * code to blindly write to 'ctx->wc' without worrying about whether the
- * caller really wants wildcards. */
+ * null, this is a pointer to a temporary buffer. */
 struct flow_wildcards *wc;
 
 /* Output buffer for datapath actions.  When 'xin->odp_actions' is nonnull,
@@ -3268,7 +3266,7 @@ xlate_table_action(struct xlate_ctx *ctx, ofp_port_t 
in_port, uint8_t table_id,
 
 rule = rule_dpif_lookup_from_table(ctx->xbridge->ofproto,
ctx->tables_version,
-   >xin->flow, ctx->xin->wc,
+   >xin->flow, ctx->wc,
ctx->xin->resubmit_stats,
>table_id, in_port,
may_packet_in, honor_table_miss);
@@ -5075,7 +5073,6 @@ xlate_actions(struct xlate_in *xin, struct xlate_out 
*xout)
 union mf_subvalue stack_stub[1024 / sizeof(union mf_subvalue)];
 uint64_t action_set_stub[1024 / 8];
 uint64_t frozen_actions_stub[1024 / 8];
-struct flow_wildcards scratch_wc;
 uint64_t actions_stub[256 / 8];
 struct ofpbuf scratch_actions = OFPBUF_STUB_INITIALIZER(actions_stub);
 struct xlate_ctx ctx = {
@@ -5086,7 +5083,9 @@ xlate_actions(struct xlate_in *xin, struct xlate_out 
*xout)
 .xbridge = xbridge,
 .stack = OFPBUF_STUB_INITIALIZER(stack_stub),
 .rule = xin->rule,
-.wc = xin->wc ? xin->wc : _wc,
+.wc = (xin->wc
+   ? xin->wc
+   : &(struct flow_wildcards) { .masks.dl_type = 0 }),
 .odp_actions = xin->odp_actions ? xin->odp_actions : _actions,
 
 .recurse = xin->recurse,
@@ -5139,9 +5138,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out 
*xout)
 }
 
 ofpbuf_reserve(ctx.odp_actions, NL_A_U32_SIZE);
-if (xin->wc) {
-xlate_wc_init();
-}
+xlate_wc_init();
 
 COVERAGE_INC(xlate_actions);
 
@@ -5231,7 +5228,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out 
*xout)
 
 if (!xin->ofpacts && !ctx.rule) {
 ctx.rule = rule_dpif_lookup_from_table(
-ctx.xbridge->ofproto, ctx.tables_version, flow, xin->wc,
+ctx.xbridge->ofproto, ctx.tables_version, flow, ctx.wc,
 ctx.xin->resubmit_stats, _id,
 flow->in_port.ofp_port, true, true);
 if (ctx.xin->resubmit_stats) {
@@ -5382,9 +5379,7 @@ xlate_actions(struct xlate_in *xin, struct xlate_out 
*xout)
 }
 }
 
-if (xin->wc) {
-xlate_wc_finish();
-}
+xlate_wc_finish();
 
 exit:
 ofpbuf_uninit();
-- 
2.1.3

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] lock ordering from Valgrind --tool=helgrind

2016-04-22 Thread Ben Pfaff
I sent a series that should fix this deadlock.  You can still fix (or
report) others!

http://openvswitch.org/pipermail/dev/2016-April/070055.html
http://openvswitch.org/pipermail/dev/2016-April/070056.html

On Tue, Apr 12, 2016 at 10:05:36PM -0700, William Tu wrote:
> Let me take a stab at them first, if I couldn't figure out, I will pass
> them along. Thanks
> 
> On Tue, Apr 12, 2016 at 9:09 PM, Ben Pfaff  wrote:
> 
> > On Tue, Apr 12, 2016 at 02:39:49PM -0700, William Tu wrote:
> > > Hi Ben,
> > >
> > > Thanks, the fix solves the problem.
> > >
> > > However, there are a couple of errors reported by Helgrind, do you think
> > > it's worth fixing all of them?
> >
> > Probably.  Do you want to pass them along for me to take a look?  Or do
> > you want to take a stab at them first?
> >
> > > Regards,
> > > William
> > >
> > >
> > > On Sun, Apr 10, 2016 at 2:16 PM, Ben Pfaff  wrote:
> > >
> > > > On Wed, Feb 24, 2016 at 12:40:33PM -0800, William Tu wrote:
> > > > > These two locks are reported by helgrind which have ordering
> > > > inconsistency.
> > > > > "netdev_class_mutex"
> > > > > "route_table_mutex"
> > > > > http://openvswitch.org/pipermail/discuss/2016-February/020216.html
> > > >
> > > > Thanks for the report.  Somehow I didn't get the original report,
> > > > although I see it in the archive.
> > > >
> > > > Does the following appear to fix the problem?
> > > >
> > > > diff --git a/lib/netdev-vport.c b/lib/netdev-vport.c
> > > > index e398562..b3eef5d 100644
> > > > --- a/lib/netdev-vport.c
> > > > +++ b/lib/netdev-vport.c
> > > > @@ -1,5 +1,5 @@
> > > >  /*
> > > > - * Copyright (c) 2010, 2011, 2012, 2013, 2014 Nicira, Inc.
> > > > + * Copyright (c) 2010, 2011, 2012, 2013, 2014, 2016 Nicira, Inc.
> > > >   *
> > > >   * Licensed under the Apache License, Version 2.0 (the "License");
> > > >   * you may not use this file except in compliance with the License.
> > > > @@ -373,7 +373,6 @@ netdev_vport_run(void)
> > > >  {
> > > >  uint64_t seq;
> > > >
> > > > -route_table_run();
> > > >  seq = route_table_get_change_seq();
> > > >  if (rt_change_seqno != seq) {
> > > >  rt_change_seqno = seq;
> > > > @@ -386,7 +385,6 @@ netdev_vport_wait(void)
> > > >  {
> > > >  uint64_t seq;
> > > >
> > > > -route_table_wait();
> > > >  seq = route_table_get_change_seq();
> > > >  if (rt_change_seqno != seq) {
> > > >  poll_immediate_wake();
> > > > diff --git a/lib/netdev.c b/lib/netdev.c
> > > > index 3e50694..710739a 100644
> > > > --- a/lib/netdev.c
> > > > +++ b/lib/netdev.c
> > > > @@ -45,6 +45,7 @@
> > > >  #include "openflow/openflow.h"
> > > >  #include "packets.h"
> > > >  #include "poll-loop.h"
> > > > +#include "route-table.h"
> > > >  #include "seq.h"
> > > >  #include "shash.h"
> > > >  #include "smap.h"
> > > > @@ -182,6 +183,7 @@ netdev_run(void)
> > > >  struct netdev_registered_class *rc;
> > > >
> > > >  netdev_initialize();
> > > > +route_table_run();
> > > >  ovs_mutex_lock(_class_mutex);
> > > >  HMAP_FOR_EACH (rc, hmap_node, _classes) {
> > > >  if (rc->class->run) {
> > > > @@ -201,6 +203,7 @@ netdev_wait(void)
> > > >  {
> > > >  struct netdev_registered_class *rc;
> > > >
> > > > +route_table_wait();
> > > >  ovs_mutex_lock(_class_mutex);
> > > >  HMAP_FOR_EACH (rc, hmap_node, _classes) {
> > > >  if (rc->class->wait) {
> > > >
> >
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH 1/2] cmap: New macro CMAP_INITIALIZER, for initializing an empty cmap.

2016-04-22 Thread Ben Pfaff
Sometimes code is much simpler if we can statically initialize data
structures.  Until now, this has not been possible for cmap-based data
structures, so this commit introduces a CMAP_INITIALIZER macro.

This works by adding a singleton empty cmap_impl that simply forces the
first insertion into any cmap that points to it to allocate a real
cmap_impl.  There could be some risk that rogue code modifies the
singleton, so for safety it is also marked 'const' to allow the linker to
put it into a read-only page.

This adds a new OVS_ALIGNED_VAR macro with GCC and MSVC implementations.
The latter is based on Microsoft webpages, so developers who know Windows
might want to scrutinize it.

As examples of the kind of simplification this can make possible, this
commit removes an initialization function from ofproto-dpif-rid.c and a
call to cmap_init() from tnl-neigh-cache.c.  An upcoming commit will add
another user.

CC: Jarno Rajahalme 
CC: Gurucharan Shetty 
Signed-off-by: Ben Pfaff 
---
 include/openvswitch/compiler.h | 13 +
 lib/cmap.c | 30 ++
 lib/cmap.h |  8 +++-
 lib/tnl-neigh-cache.c  |  6 ++
 ofproto/ofproto-dpif-rid.c | 34 --
 ofproto/ofproto-dpif-rid.h |  2 --
 ofproto/ofproto-dpif.c |  2 --
 tests/test-cmap.c  |  5 ++---
 8 files changed, 46 insertions(+), 54 deletions(-)

diff --git a/include/openvswitch/compiler.h b/include/openvswitch/compiler.h
index 42c864f..6e779f3 100644
--- a/include/openvswitch/compiler.h
+++ b/include/openvswitch/compiler.h
@@ -183,18 +183,23 @@
 #define OVS_PACKED(DECL) __pragma(pack(push, 1)) DECL __pragma(pack(pop))
 #endif
 
-/* For defining a structure whose instances should aligned on an N-byte
- * boundary.
- *
- * e.g. The following:
+/* OVS_ALIGNED_STRUCT may be used to define a structure whose instances should
+ * aligned on an N-byte boundary.  This:
  * OVS_ALIGNED_STRUCT(64, mystruct) { ... };
  * is equivalent to the following except that it specifies 64-byte alignment:
  * struct mystruct { ... };
+ *
+ * OVS_ALIGNED_VAR defines a variable aligned on an N-byte boundary.  For
+ * example,
+ * OVS_ALIGNED_VAR(64) int x;
+ * defines a "int" variable that is aligned on a 64-byte boundary.
  */
 #ifndef _MSC_VER
 #define OVS_ALIGNED_STRUCT(N, TAG) struct __attribute__((aligned(N))) TAG
+#define OVS_ALIGNED_VAR(N) __attribute__((aligned(N)))
 #else
 #define OVS_ALIGNED_STRUCT(N, TAG) __declspec(align(N)) struct TAG
+#define OVS_ALIGNED_VAR(N) __declspec(align(N))
 #endif
 
 /* Supplies code to be run at startup time before invoking main().
diff --git a/lib/cmap.c b/lib/cmap.c
index 7a54ea6..88f4394 100644
--- a/lib/cmap.c
+++ b/lib/cmap.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014 Nicira, Inc.
+ * Copyright (c) 2014, 2016 Nicira, Inc.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -170,13 +170,13 @@ struct cmap_impl {
 unsigned int min_n; /* Min elements before shrinking. */
 uint32_t mask;  /* Number of 'buckets', minus one. */
 uint32_t basis; /* Basis for rehashing client's hash values. */
-
-/* Padding to make cmap_impl exactly one cache line long. */
-uint8_t pad[CACHE_LINE_SIZE - sizeof(unsigned int) * 5];
-
-struct cmap_bucket buckets[];
+uint8_t pad[CACHE_LINE_SIZE - 4 * 5]; /* Pad to end of cache lind. */
+struct cmap_bucket buckets[1];
 };
-BUILD_ASSERT_DECL(sizeof(struct cmap_impl) == CACHE_LINE_SIZE);
+BUILD_ASSERT_DECL(sizeof(struct cmap_impl) == CACHE_LINE_SIZE * 2);
+
+/* An empty cmap. */
+OVS_ALIGNED_VAR(CACHE_LINE_SIZE) const struct cmap_impl empty_cmap;
 
 static struct cmap_impl *cmap_rehash(struct cmap *, uint32_t mask);
 
@@ -226,8 +226,9 @@ cmap_impl_create(uint32_t mask)
 
 ovs_assert(is_pow2(mask + 1));
 
-impl = xzalloc_cacheline(sizeof *impl
- + (mask + 1) * sizeof *impl->buckets);
+/* There are 'mask + 1' buckets but struct cmap_impl has one bucket built
+ * in, so we only need to add space for the extra 'mask' buckets. */
+impl = xzalloc_cacheline(sizeof *impl + mask * sizeof *impl->buckets);
 impl->n = 0;
 impl->max_n = calc_max_n(mask);
 impl->min_n = calc_min_n(mask);
@@ -241,7 +242,7 @@ cmap_impl_create(uint32_t mask)
 void
 cmap_init(struct cmap *cmap)
 {
-ovsrcu_set(>impl, cmap_impl_create(0));
+ovsrcu_set(>impl, CONST_CAST(struct cmap_impl *, _cmap));
 }
 
 /* Destroys 'cmap'.
@@ -252,7 +253,10 @@ void
 cmap_destroy(struct cmap *cmap)
 {
 if (cmap) {
-ovsrcu_postpone(free_cacheline, cmap_get_impl(cmap));
+struct cmap_impl *impl = cmap_get_impl(cmap);
+if (impl != _cmap) {
+ovsrcu_postpone(free_cacheline, impl);
+}
 }
 }
 
@@ -892,7 +896,9 @@ 

[ovs-dev] [PATCH 2/2] netdev: Fix potential deadlock.

2016-04-22 Thread Ben Pfaff
Until now, netdev_class_mutex and route_table_mutex could be taken in
either order:

* netdev_run() takes netdev_class_mutex, then netdev_vport_run() calls
  route_table_run(), which takes route_table_mutex.

* route_table_init() takes route_table_mutex and then eventually calls
  netdev_open(), which takes netdev_class_mutex.

This commit fixes the problem by converting the netdev_classes hmap,
protected by netdev_class_mutex, into a cmap protected on the read
side by RCU.  Only a very small amount of code actually writes to the
cmap in question, so it's a lot easier to understand the locking rules
at that point.  In particular, there's no need to take netdev_class_mutex
from either netdev_run() or netdev_open(), so neither of the code paths
above determines a lock ordering any longer.

Reported-by: William Tu 
Reported-at: http://openvswitch.org/pipermail/discuss/2016-February/020216.html
Signed-off-by: Ben Pfaff 
---
 lib/netdev.c | 126 ++-
 1 file changed, 47 insertions(+), 79 deletions(-)

diff --git a/lib/netdev.c b/lib/netdev.c
index 3e50694..d771af5 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -31,6 +31,7 @@
 #include 
 #endif
 
+#include "cmap.h"
 #include "coverage.h"
 #include "dpif.h"
 #include "dp-packet.h"
@@ -75,24 +76,20 @@ static struct ovs_mutex netdev_mutex = 
OVS_MUTEX_INITIALIZER;
 static struct shash netdev_shash OVS_GUARDED_BY(netdev_mutex)
 = SHASH_INITIALIZER(_shash);
 
-/* Protects 'netdev_classes' against insertions or deletions.
- *
- * This is a recursive mutex to allow recursive acquisition when calling into
- * providers.  For example, netdev_run() calls into provider 'run' functions,
- * which might reasonably want to call one of the netdev functions that takes
- * netdev_class_mutex. */
-static struct ovs_mutex netdev_class_mutex OVS_ACQ_BEFORE(netdev_mutex);
+/* Mutual exclusion of */
+static struct ovs_mutex netdev_class_mutex OVS_ACQ_BEFORE(netdev_mutex)
+= OVS_MUTEX_INITIALIZER;
 
 /* Contains 'struct netdev_registered_class'es. */
-static struct hmap netdev_classes OVS_GUARDED_BY(netdev_class_mutex)
-= HMAP_INITIALIZER(_classes);
+static struct cmap netdev_classes = CMAP_INITIALIZER;
 
 struct netdev_registered_class {
-/* In 'netdev_classes', by class->type. */
-struct hmap_node hmap_node OVS_GUARDED_BY(netdev_class_mutex);
-const struct netdev_class *class OVS_GUARDED_BY(netdev_class_mutex);
-/* Number of 'struct netdev's of this class. */
-int ref_cnt OVS_GUARDED_BY(netdev_class_mutex);
+struct cmap_node cmap_node; /* In 'netdev_classes', by class->type. */
+const struct netdev_class *class;
+
+/* Number of references: one for the class itself and one for every
+ * instance of the class. */
+struct ovs_refcount refcnt;
 };
 
 /* This is set pretty low because we probably won't learn anything from the
@@ -127,27 +124,14 @@ netdev_is_pmd(const struct netdev *netdev)
 }
 
 static void
-netdev_class_mutex_initialize(void)
-OVS_EXCLUDED(netdev_class_mutex, netdev_mutex)
-{
-static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
-
-if (ovsthread_once_start()) {
-ovs_mutex_init_recursive(_class_mutex);
-ovsthread_once_done();
-}
-}
-
-static void
 netdev_initialize(void)
-OVS_EXCLUDED(netdev_class_mutex, netdev_mutex)
+OVS_EXCLUDED(netdev_mutex)
 {
 static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER;
 
 if (ovsthread_once_start()) {
-netdev_class_mutex_initialize();
-
 fatal_signal_add_hook(restore_all_flags, NULL, NULL, true);
+
 netdev_vport_patch_register();
 
 #ifdef __linux__
@@ -166,7 +150,6 @@ netdev_initialize(void)
 netdev_vport_tunnel_register();
 #endif
 netdev_dpdk_register();
-
 ovsthread_once_done();
 }
 }
@@ -177,18 +160,16 @@ netdev_initialize(void)
  * main poll loop. */
 void
 netdev_run(void)
-OVS_EXCLUDED(netdev_class_mutex, netdev_mutex)
+OVS_EXCLUDED(netdev_mutex)
 {
-struct netdev_registered_class *rc;
-
 netdev_initialize();
-ovs_mutex_lock(_class_mutex);
-HMAP_FOR_EACH (rc, hmap_node, _classes) {
+
+struct netdev_registered_class *rc;
+CMAP_FOR_EACH (rc, cmap_node, _classes) {
 if (rc->class->run) {
 rc->class->run();
 }
 }
-ovs_mutex_unlock(_class_mutex);
 }
 
 /* Arranges for poll_block() to wake up when netdev_run() needs to be called.
@@ -197,26 +178,23 @@ netdev_run(void)
  * main poll loop. */
 void
 netdev_wait(void)
-OVS_EXCLUDED(netdev_class_mutex, netdev_mutex)
+OVS_EXCLUDED(netdev_mutex)
 {
-struct netdev_registered_class *rc;
+netdev_initialize();
 
-ovs_mutex_lock(_class_mutex);
-HMAP_FOR_EACH (rc, hmap_node, _classes) {
+struct netdev_registered_class *rc;
+CMAP_FOR_EACH (rc, cmap_node, _classes) {
 if (rc->class->wait) {
 

[ovs-dev] [PATCH V5] ovn-controller: reload configured SB probe timer

2016-04-22 Thread nghosh
There are four sessions established from ovn-controller to the following:
OVN Southbound — JSONRPC based
Local ovsdb — JSONRPC based
Local vswitchd — openflow based from ofctrl
Local vswitchd — openflow based from pinctrl

All of these sessions have their own probe_interval, and currently one
[SB] of them can be configured using ovn-vsctl command, but that is not
effective on the fly —in other words, ovn-controller has to be restarted to
use that probe_timer value, this patch takes care of that.
For the local ovsdb connection, probe timer is already disabled. For the last
two connections, they do not need probe_timer as they are over unix domain
socket. This patch takes care of that as well.

This change has been tested putting logs in several places like in
ovn-controller.c, lib/rconn.c to make sure the right probe_timer
values are in effect. Also, by making sure from ovn-controller's
log file that there is no more reconnect happening due to probe
under heavy load.

Signed-off-by: Nirapada Ghosh 

--
diff --git a/lib/rconn.c b/lib/rconn.c
index 063eda8..8482d47 100644
--- a/lib/rconn.c
+++ b/lib/rconn.c
@@ -29,6 +29,7 @@
 #include "openvswitch/vlog.h"
 #include "poll-loop.h"
 #include "sat-math.h"
+#include "stream.h"
 #include "timeval.h"
 #include "util.h"
 
@@ -339,6 +340,9 @@ rconn_connect(struct rconn *rc, const char *target, const 
char *name)
 rconn_disconnect__(rc);
 rconn_set_target__(rc, target, name);
 rc->reliable = true;
+if (!stream_or_pstream_needs_probes(target)) {
+rc->probe_interval = 0;
+}
 reconnect(rc);
 ovs_mutex_unlock(>mutex);
 }
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index f68f842..23bb723 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -58,6 +58,13 @@ static unixctl_cb_func ovn_controller_exit;
 static unixctl_cb_func ct_zone_list;
 
 #define DEFAULT_BRIDGE_NAME "br-int"
+#define DEFAULT_PROBE_INTERVAL 5
+
+static void set_probe_timer_if_changed(const struct ovsrec_open_vswitch *cfg,
+   const struct ovsdb_idl *sb_idl);
+static bool extract_probe_timer(const struct ovsrec_open_vswitch *cfg,
+char *key_name,
+int *ret_value);
 
 static void parse_options(int argc, char *argv[]);
 OVS_NO_RETURN static void usage(void);
@@ -226,32 +233,6 @@ get_ovnsb_remote(struct ovsdb_idl *ovs_idl)
 }
 }
 
-/* Retrieves the OVN Southbound remote's json session probe interval from the
- * "external-ids:ovn-remote-probe-interval" key in 'ovs_idl' and returns it.
- *
- * This function must be called after get_ovnsb_remote(). */
-static bool
-get_ovnsb_remote_probe_interval(struct ovsdb_idl *ovs_idl, int *value)
-{
-const struct ovsrec_open_vswitch *cfg = ovsrec_open_vswitch_first(ovs_idl);
-if (!cfg) {
-return false;
-}
-
-const char *probe_interval =
-smap_get(>external_ids, "ovn-remote-probe-interval");
-if (probe_interval) {
-if (str_to_int(probe_interval, 10, value)) {
-return true;
-}
-
-VLOG_WARN("Invalid value for OVN remote probe interval: %s",
-  probe_interval);
-}
-
-return false;
-}
-
 int
 main(int argc, char *argv[])
 {
@@ -315,10 +296,12 @@ main(int argc, char *argv[])
 ovsdb_idl_create(ovnsb_remote, _idl_class, true, true));
 ovsdb_idl_get_initial_snapshot(ovnsb_idl_loop.idl);
 
-int probe_interval = 0;
-if (get_ovnsb_remote_probe_interval(ovs_idl_loop.idl, _interval)) {
-ovsdb_idl_set_probe_interval(ovnsb_idl_loop.idl, probe_interval);
-}
+const struct ovsrec_open_vswitch *cfg =
+ovsrec_open_vswitch_first(ovs_idl_loop.idl);
+if (!cfg) {
+return false;
+ }
+set_probe_timer_if_changed(cfg, ovnsb_idl_loop.idl);
 
 /* Initialize connection tracking zones. */
 struct simap ct_zones = SIMAP_INITIALIZER(_zones);
@@ -346,6 +329,7 @@ main(int argc, char *argv[])
 .ovnsb_idl = ovnsb_idl_loop.idl,
 .ovnsb_idl_txn = ovsdb_idl_loop_run(_idl_loop),
 };
+set_probe_timer_if_changed(cfg, ovnsb_idl_loop.idl);
 
 /* Contains "struct local_datpath" nodes whose hash values are the
  * tunnel_key of datapaths with at least one local port binding. */
@@ -580,3 +564,52 @@ ct_zone_list(struct unixctl_conn *conn, int argc 
OVS_UNUSED,
 unixctl_command_reply(conn, ds_cstr());
 ds_destroy();
 }
+
+/* If SB probe timer is changed using ovs-vsctl command, this function
+ * will set that probe timer value for the session.
+ * Arguments:
+ * 'cfg': Holding the external-id values read from southbound DB.
+ * 'sb_idl': pointer to the ovs_idl connection to OVN southbound.
+ */
+static void
+set_probe_timer_if_changed(const struct ovsrec_open_vswitch *cfg,
+   const struct ovsdb_idl *sb_idl)
+{
+static int probe_int_sb = 

Re: [ovs-dev] [PATCH 2/2] ofproto-dpif-xlate: Tidy up ct_mark xlate code.

2016-04-22 Thread Joe Stringer
On 22 April 2016 at 08:40, Ben Pfaff  wrote:
> On Fri, Apr 15, 2016 at 11:36:05AM -0700, Joe Stringer wrote:
>> Make the ct_mark netlink serialization more consistent with the way that
>> ct_label is serialized.
>>
>> Signed-off-by: Joe Stringer 
>
> This is nice, I noticed and wondered about the inconsistency while
> reviewing the previous patch.
>
> Acked-by: Ben Pfaff 

Thanks for the review, applied to master.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/2] system-traffic: Add basic geneve tunnel sanity test.

2016-04-22 Thread Joe Stringer
On 21 April 2016 at 17:11, Daniele Di Proietto  wrote:
> Thanks for adding this tests!
>
> Acked-by: Daniele Di Proietto 

Thanks for the review! Applied to master.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/2] ofproto-dpif-xlate: xlate ct_{mark, label} correctly.

2016-04-22 Thread Joe Stringer
On 22 April 2016 at 08:37, Ben Pfaff  wrote:
> On Fri, Apr 15, 2016 at 11:36:04AM -0700, Joe Stringer wrote:
>> When translating multiple ct actions in a row which include modification
>> of ct_mark or ct_labels, these fields could be incorrectly translated
>> into datapath actions, resulting in modification of these fields for
>> entries when the OpenFlow rules didn't actually specify the change.
>>
>> For instance, the following OpenFlow actions:
>> ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
>>
>> Would translate into the datapath actions:
>> ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
>>
>> This commit fixes the issue by zeroing the wildcards for these fields
>> prior to performing nested actions translation (and restoring
>> afterwards). As such, these fields do not hold both the match and the
>> field modification values at the same time. As a result, the ct_mark and
>> ct_labels don't leak from one ct action to the next.
>>
>> Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
>> Fixes: 9daf23484fb1 ("Add connection tracking label support.")
>> Signed-off-by: Joe Stringer 
>
> I looked this over carefully and did not spot any problems.  Thank you!
>
> Acked-by: Ben Pfaff 

Thanks, I applied this patch to master and branch-2.5.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] system-traffic: Fix IPv6 frag vxlan check.

2016-04-22 Thread Joe Stringer
On 21 April 2016 at 17:07, Daniele Di Proietto  wrote:
> Thanks for fixing this
>
> Acked-by: Daniele Di Proietto 

Thanks, applied to master.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH V4] add lrouter and lrport related commands to ovn-nbctl

2016-04-22 Thread nghosh
ovn-nbctl provides a shortcut to perform commands related lswitch, lport
and such but it doesn't have similar commands related to logical routers
and logical router ports. Also, 'ovn-nbctl show' is supposed to show an
overview of database contents, which means it should show the routers
as well. "ovn-nbctl show LSWITCH" shows the switch details, similarly
"ovn-nbctl show LROUTER" should show the router details too. This patch
takes care of all of these.

Modifications;
1) ovn-nbctl show -- will now show lrouters as well
2) ovn-nbctl show  -- will show the router now

New commands added:
3) ovn-nbctl lrouter-add [LROUTER]
4) ovn-nbctl lrouter-del LROUTER
5) ovn-nbctl lrouter-list
6) lrport-add LROUTER LRPORT
7) lrport-del LRPORT
8) lrport-list LROUTER
9) lrport-set-mac-address LRPORT [ADDRESS]
10) lrport-get-mac-address LRPORT
11) lrport-set-enabled LRPORT STATE
12) lrport-get-enabled LRPORT

Unit test cases have been added to test all of these modifications and
additions.

Signed-off-by: Nirapada Ghosh 

---
diff --git a/ovn/utilities/ovn-nbctl.8.xml b/ovn/utilities/ovn-nbctl.8.xml
index 3b337dd..a6894e6 100644
--- a/ovn/utilities/ovn-nbctl.8.xml
+++ b/ovn/utilities/ovn-nbctl.8.xml
@@ -12,14 +12,36 @@
 General Commands
 
 
-  show [lswitch]
+  show [lswitch | lrouter]
   
 Prints a brief overview of the database contents.  If
 lswitch is provided, only records related to that
-logical switch are shown.
+logical switch are shown. If
+lrouter is provided, only records related to that
+logical router are shown.
   
 
 
+Logical Router Commands
+
+
+  lrouter-add [lrouter] 
+Creates a new logical lrouter named lrouter.  If
+lrouter is not provided, the lrouter will not have a
+name so other commands must refer to this router by its UUID.
+Initially the router will have no ports.
+  
+
+  lrouter-del lrouter
+  
+Deletes lrouter.
+  
+
+  lrouter-list
+  
+Lists all existing logical routers on standard output, one per line.
+  
+
 Logical Switch Commands
 
 
@@ -67,7 +89,57 @@
 Lists the ACLs on lswitch.
   
 
+Logical Router Port Commands
+
+  lrport-add lrouter lrport
+  
+Creates on lrouter a new logical router port named
+lrport.
+  
 
+  lrport-del lrport
+  
+Deletes lrport.
+  
+
+  lrport-list lrouter
+  
+Lists all the logical router ports within lrouter on
+standard output, one per line.
+  
+
+  lrport-set-mac-address lrport
+address...
+  
+Sets the address associated with lrport to
+address.  address should be either an
+Ethernet address or an Ethernet address followed by an IP address
+(separated by a space and quoted to form a single command-line
+argument).  The special form unknown is also valid.
+  
+
+  lrport-get-mac-address lrport
+  
+Lists the mac address associated with lrport on standard
+output.
+  
+
+  lrport-set-enabled lrport
+  state
+  
+Set the administrative state of lrport,
+either enabled or disabled.
+When a port is disabled, no traffic is allowed into
+or out of the port.
+  
+
+  lrport-get-enabled lrport
+  
+Prints the administrative state of lrport,
+either enabled or disabled.
+  
+
+
 Logical Port Commands
 
   lport-add lswitch lport
diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c
index bdad368..6d18005 100644
--- a/ovn/utilities/ovn-nbctl.c
+++ b/ovn/utilities/ovn-nbctl.c
@@ -290,12 +290,18 @@ usage: %s [OPTIONS] COMMAND [ARG...]\n\
 General commands:\n\
   show  print overview of database contents\n\
   show LSWITCH  print overview of database contents for LSWITCH\n\
+  show LROUTER  print overview of database contents for LROUTER\n\
 \n\
 Logical switch commands:\n\
   lswitch-add [LSWITCH] create a logical switch named LSWITCH\n\
   lswitch-del LSWITCH   delete LSWITCH and all its ports\n\
   lswitch-list  print the names of all logical switches\n\
 \n\
+Logical router commands:\n\
+  lrouter-add [LROUTER] create a logical router named LROUTER\n\
+  lrouter-del LROUTER   delete LROUTER and all its ports\n\
+  lrouter-list  print the names of all logical routers\n\
+\n\
 ACL commands:\n\
   acl-add LSWITCH DIRECTION PRIORITY MATCH ACTION [log]\n\
 add an ACL to LSWITCH\n\
@@ -303,6 +309,19 @@ ACL commands:\n\
 remove ACLs from LSWITCH\n\
   acl-list LSWITCH  print ACLs for LSWITCH\n\
 \n\
+Logical router port commands:\n\
+  lrport-add LROUTER LRPORT   add logical router port LRPORT to 

[ovs-dev] [PATCH v2] Add change tracking documentation

2016-04-22 Thread Ryan Moats
From: RYAN D. MOATS 

Change tracking is a bit different from what someone with
"classic" database experience might expect, so let's add
the knowledged gained from the experience of making change
tracking work for incremental processing.

Signed-off-by: RYAN D. MOATS 
---
 lib/ovsdb-idl.h |   13 -
 1 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h
index bad2dc6..c6e3350 100644
--- a/lib/ovsdb-idl.h
+++ b/lib/ovsdb-idl.h
@@ -113,7 +113,18 @@ void ovsdb_idl_add_table(struct ovsdb_idl *,
 void ovsdb_idl_omit(struct ovsdb_idl *, const struct ovsdb_idl_column *);
 void ovsdb_idl_omit_alert(struct ovsdb_idl *, const struct ovsdb_idl_column *);
 
-/* Change tracking. */
+/* Change tracking.  In OVSDB, change tracking is applied at each client in
+ * the IDL layer.  This means that when a client makes a request to track
+ * changes on a particular table, they are essentially requesting 
+ * information about the incremental changes to that table from the point in
+ * time that the request is made.  Once the client clears tracked changes,
+ * that information will no longer be available.
+ *
+ * The implication of the above is that if a client requires replaying
+ * untracked history, it faces the choice of either trying to remember
+ * changes itself (which translates into a memory leak) or of being
+ * structured with a path for processing the full untracked table as
+ * well as a path that processes incremental changes.  */
 enum ovsdb_idl_change {
 OVSDB_IDL_CHANGE_INSERT,
 OVSDB_IDL_CHANGE_MODIFY,
-- 
1.7.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] Add change tracking documentation

2016-04-22 Thread Ryan Moats
From: RYAN D. MOATS 

Change tracking is a bit different from what someone with
"classic" database experience might expect, so let's add
the knowledged gained from the experience of making change
tracking work for incremental processing.

Signed-off-by: RYAN D. MOATS 
---
 lib/ovsdb-idl.h |   13 -
 1 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h
index bad2dc6..c6e3350 100644
--- a/lib/ovsdb-idl.h
+++ b/lib/ovsdb-idl.h
@@ -113,7 +113,18 @@ void ovsdb_idl_add_table(struct ovsdb_idl *,
 void ovsdb_idl_omit(struct ovsdb_idl *, const struct ovsdb_idl_column *);
 void ovsdb_idl_omit_alert(struct ovsdb_idl *, const struct ovsdb_idl_column *);
 
-/* Change tracking. */
+/* Change tracking.  In OVSDB, change tracking is applied at each client in
+ * the IDL layer.  This means that when a client makes a request to track
+ * changes on a particular table, they are essentially requesting 
+ * information about the incremental changes to that table from the point in
+ * time that the request is made.  Once the client clears tracked changes,
+ * that information will no longer be available.
+ *
+ * The implication of the above is that if a client requires replaying
+ * untracked history, it faces the choice of either trying to remember
+ * changes itself (which translates into a memory leak) or of being
+ * structured with a path for processing the full untracked table as
+ * well as a path that processes incremental changes.  */
 enum ovsdb_idl_change {
 OVSDB_IDL_CHANGE_INSERT,
 OVSDB_IDL_CHANGE_MODIFY,
-- 
1.7.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] Add change tracking documentation

2016-04-22 Thread Ryan Moats
"Ansari, Shad"  wrote on 04/22/2016 11:38:17 AM:

> From: "Ansari, Shad" 
> To: Ryan Moats/Omaha/IBM@IBMUS, "dev@openvswitch.org"

> Date: 04/22/2016 11:39 AM
> Subject: RE: [ovs-dev] [PATCH] Add change tracking documentation
>
>
> > +/* Change tracking.  In OVSDB, change tracking is applied at each
> > +client in
> > + * the IDL layer.  This means that when a client makes a request to
> > +track
> > + * changes on a particular table, they are essentially requesting
> > + * information about the incremental changes to that table from the
> > +point in
> > + * time that the request is made.  Once the client clears tracked
> > +changes,
> > + * that information will no longer be available.
>
> Suggestion: I would leave it at that. The implication you are
> deriving may not apply to all usages.

I don't quite agree with just leaving it there.  The fact is that
one *can't* replay past changes after they are cleared and so if
a client requires replaying the untracked past, it faces the difficult
choice of either trying to remember the changes itself (which I believe
translates into a memory leak), or having two independent code paths.

Now, I'm willing to move the statement beyond into a separate paragraph
and wordsmith it to the above, but I'm not comfortable with just
leaving it be at the above.

Ryan
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [RFC] Idea for fixing "raceful" E2E ovn tests

2016-04-22 Thread Ryan Moats
Ben Pfaff  wrote on 04/22/2016 04:04:11 PM:

> From: Ben Pfaff 
> To: Ryan Moats/Omaha/IBM@IBMUS
> Cc: ovs dev 
> Date: 04/22/2016 04:04 PM
> Subject: Re: [ovs-dev] [RFC] Idea for fixing "raceful" E2E ovn tests
>
> On Fri, Apr 22, 2016 at 03:56:52PM -0500, Ryan Moats wrote:
> > Ben Pfaff  wrote on 04/22/2016 03:06:16 PM:
> >
> > > From: Ben Pfaff 
> > > To: Ryan Moats/Omaha/IBM@IBMUS
> > > Cc: ovs dev 
> > > Date: 04/22/2016 03:06 PM
> > > Subject: Re: [ovs-dev] [RFC] Idea for fixing "raceful" E2E ovn tests
> > >
> > > On Thu, Apr 14, 2016 at 08:37:27AM -0500, Ryan Moats wrote:
> > > > I've pretty much become fed up with the "raceful" nature of the E2E
> > > > ovn test cases and so I've set my self the goal of fixing them.
> > > >
> > > > After some thought last night, I *think* I might have found a way
> > > > to do it.  Now, since I'm not 100% that my idea is the cleanest way
> > > > to fix things, I thought I'd throw the idea out on the table first
> > > > and get some feedback before I go off and write code.
> > > >
> > > > I'm thinking of the following changes:
> > > >
> > > > in ovn/controller/ofctrl.c, change ofctrl_put to return a boolean
> > > > (true if a flow is queue, false otherwise).
> > > >
> > > > in ovn/controller/ovn-controller.c:
> > > > 1. add a command line argument (--unit-test).
> > > > 2. if ofctrl_put return true and the above command line argument
> > > >is specified, dump a line to the log file saying that
ovn-controller
> > > >hasn't made any OF changes in the last loop
> > > >
> > > > in the ovn E2E tests cases:
> > > > 1. start ovn-controller with the --unit-test argument
> > > > 2. instead of sleep 1 for waiting for ovn-northd/ovn-controller to
> > > >quiesce, look at the tail of the ovn-controller logs for two
> > > >consecutive loops where ovn-controller hasn't made any OF
changes
> > > >in the last loop
> > >
> > > I've had a different idea for how to do this for a while now.  Let me
> > > see...
> > >
> > > You might not be familiar with how ovs-vsctl waits until the changes
> > > that it makes to the Open vSwitch configuration take effect before
> > > exiting.  It uses a sequence number protocol in the Open_vSwitch
> > > database table.  This table (which has exactly one row) has two
integer
> > > columns, named cur_cfg and next_cfg.  Initially cur_cfg and next_cfg
are
> > > both zero.  When ovs-vsctl modifies the configuration, it also
> > > atomically increments next_cfg.  Before ovs-vswitchd reconfigures
> > > itself, it reads next_cfg, then when it finishes it sets cur_cfg to
the
> > > next_cfg value that it read.  ovs-vsctl waits until cur_cfg is
greater
> > > than or equal to the next_cfg that it set before it exits.
> > >
> > > This has a number of nice properties.  It has low overhead in time
and
> > > space, it works efficiently with any number of writers, and later on
it
> > > is easy to observe what happened and when by looking at the database
> > > log.
> > >
> > > My thought was to extend this concept to the OVN distributed system.
> > > For example:
> > >
> > > * Add a next_cfg value somewhere in the OVN_Northbound
> > >   database.  When ovn-nbctl or something else updates the
> > >   database and wants to allow for finding out where the
updates
> > >   are propagated, it increments next_cfg.
> > >
> > > * Add a similar next_cfg somewhere in OVN_Southbound, and
make
> > >   ovn-northd propagate the value from northbound to
southbound.
> > >
> > > * Add a cur_cfg column to the Chassis table in
OVN_Southbound.
> > >   When an ovn-controller brings its chassis up to date with a
> > >   given configuration, it stores the next_cfg value that it
just
> > >   got up-to-date with into its own cur_cfg.
> > >
> > > * In theory ovn-northd could propagate the current minimum
value
> > >   of cur_cfg back to the northbound database, if that's
useful.
> > >   In a real system with constant failures though it's hard
for
> > >   me to guess whether it's realistic.
> > >
> > > This could even be used such that ovn-nbctl waits for the whole
> > > distributed system to catch up before it exits.
> > >
> > > What are your thoughts?
> >
> > This is definitely an interesting idea, but I'm not convinced of the
last
> > two items.  I agree that while having ovn-northd propagate the current
> > minimum value of cur_cfg up is good in theory, the idea of blocking
> > ovn-nbctl from exiting until everything catches up worries me.  For
> > example,
> > what happens if the SB Chassis table has an orphaned entry due to error
> > conditions?  Does that lead to the provisioning plane shutting down
because
> > ovn-nbctl never exits?
> >
> > I think I'd rather see ovn-nbctl exit immediately upon making changes
and
> > we provide an additional commands to ovn-nbctl 

Re: [ovs-dev] [RFC] Idea for fixing "raceful" E2E ovn tests

2016-04-22 Thread Ben Pfaff
On Fri, Apr 22, 2016 at 03:56:52PM -0500, Ryan Moats wrote:
> Ben Pfaff  wrote on 04/22/2016 03:06:16 PM:
> 
> > From: Ben Pfaff 
> > To: Ryan Moats/Omaha/IBM@IBMUS
> > Cc: ovs dev 
> > Date: 04/22/2016 03:06 PM
> > Subject: Re: [ovs-dev] [RFC] Idea for fixing "raceful" E2E ovn tests
> >
> > On Thu, Apr 14, 2016 at 08:37:27AM -0500, Ryan Moats wrote:
> > > I've pretty much become fed up with the "raceful" nature of the E2E
> > > ovn test cases and so I've set my self the goal of fixing them.
> > >
> > > After some thought last night, I *think* I might have found a way
> > > to do it.  Now, since I'm not 100% that my idea is the cleanest way
> > > to fix things, I thought I'd throw the idea out on the table first
> > > and get some feedback before I go off and write code.
> > >
> > > I'm thinking of the following changes:
> > >
> > > in ovn/controller/ofctrl.c, change ofctrl_put to return a boolean
> > > (true if a flow is queue, false otherwise).
> > >
> > > in ovn/controller/ovn-controller.c:
> > > 1. add a command line argument (--unit-test).
> > > 2. if ofctrl_put return true and the above command line argument
> > >is specified, dump a line to the log file saying that ovn-controller
> > >hasn't made any OF changes in the last loop
> > >
> > > in the ovn E2E tests cases:
> > > 1. start ovn-controller with the --unit-test argument
> > > 2. instead of sleep 1 for waiting for ovn-northd/ovn-controller to
> > >quiesce, look at the tail of the ovn-controller logs for two
> > >consecutive loops where ovn-controller hasn't made any OF changes
> > >in the last loop
> >
> > I've had a different idea for how to do this for a while now.  Let me
> > see...
> >
> > You might not be familiar with how ovs-vsctl waits until the changes
> > that it makes to the Open vSwitch configuration take effect before
> > exiting.  It uses a sequence number protocol in the Open_vSwitch
> > database table.  This table (which has exactly one row) has two integer
> > columns, named cur_cfg and next_cfg.  Initially cur_cfg and next_cfg are
> > both zero.  When ovs-vsctl modifies the configuration, it also
> > atomically increments next_cfg.  Before ovs-vswitchd reconfigures
> > itself, it reads next_cfg, then when it finishes it sets cur_cfg to the
> > next_cfg value that it read.  ovs-vsctl waits until cur_cfg is greater
> > than or equal to the next_cfg that it set before it exits.
> >
> > This has a number of nice properties.  It has low overhead in time and
> > space, it works efficiently with any number of writers, and later on it
> > is easy to observe what happened and when by looking at the database
> > log.
> >
> > My thought was to extend this concept to the OVN distributed system.
> > For example:
> >
> > * Add a next_cfg value somewhere in the OVN_Northbound
> >   database.  When ovn-nbctl or something else updates the
> >   database and wants to allow for finding out where the updates
> >   are propagated, it increments next_cfg.
> >
> > * Add a similar next_cfg somewhere in OVN_Southbound, and make
> >   ovn-northd propagate the value from northbound to southbound.
> >
> > * Add a cur_cfg column to the Chassis table in OVN_Southbound.
> >   When an ovn-controller brings its chassis up to date with a
> >   given configuration, it stores the next_cfg value that it just
> >   got up-to-date with into its own cur_cfg.
> >
> > * In theory ovn-northd could propagate the current minimum value
> >   of cur_cfg back to the northbound database, if that's useful.
> >   In a real system with constant failures though it's hard for
> >   me to guess whether it's realistic.
> >
> > This could even be used such that ovn-nbctl waits for the whole
> > distributed system to catch up before it exits.
> >
> > What are your thoughts?
> 
> This is definitely an interesting idea, but I'm not convinced of the last
> two items.  I agree that while having ovn-northd propagate the current
> minimum value of cur_cfg up is good in theory, the idea of blocking
> ovn-nbctl from exiting until everything catches up worries me.  For
> example,
> what happens if the SB Chassis table has an orphaned entry due to error
> conditions?  Does that lead to the provisioning plane shutting down because
> ovn-nbctl never exits?
> 
> I think I'd rather see ovn-nbctl exit immediately upon making changes and
> we provide an additional commands to ovn-nbctl that either returns a status
> code based on whether the system is up to date or not and/or the values of
> the next_cfg and cur_cfg columns (I assume that ovn-sbctl will show the
> next_cfg and cur_cfg for chassis if that table is shown). That way we can
> continue making progress in the case of corner failure cases and have a
> way to identify which row(s) in the chassis table form the source of a
> problem.
> 
> Thus (to close the 

Re: [ovs-dev] [RFC] Idea for fixing "raceful" E2E ovn tests

2016-04-22 Thread Ryan Moats
Ben Pfaff  wrote on 04/22/2016 03:06:16 PM:

> From: Ben Pfaff 
> To: Ryan Moats/Omaha/IBM@IBMUS
> Cc: ovs dev 
> Date: 04/22/2016 03:06 PM
> Subject: Re: [ovs-dev] [RFC] Idea for fixing "raceful" E2E ovn tests
>
> On Thu, Apr 14, 2016 at 08:37:27AM -0500, Ryan Moats wrote:
> > I've pretty much become fed up with the "raceful" nature of the E2E
> > ovn test cases and so I've set my self the goal of fixing them.
> >
> > After some thought last night, I *think* I might have found a way
> > to do it.  Now, since I'm not 100% that my idea is the cleanest way
> > to fix things, I thought I'd throw the idea out on the table first
> > and get some feedback before I go off and write code.
> >
> > I'm thinking of the following changes:
> >
> > in ovn/controller/ofctrl.c, change ofctrl_put to return a boolean
> > (true if a flow is queue, false otherwise).
> >
> > in ovn/controller/ovn-controller.c:
> > 1. add a command line argument (--unit-test).
> > 2. if ofctrl_put return true and the above command line argument
> >is specified, dump a line to the log file saying that ovn-controller
> >hasn't made any OF changes in the last loop
> >
> > in the ovn E2E tests cases:
> > 1. start ovn-controller with the --unit-test argument
> > 2. instead of sleep 1 for waiting for ovn-northd/ovn-controller to
> >quiesce, look at the tail of the ovn-controller logs for two
> >consecutive loops where ovn-controller hasn't made any OF changes
> >in the last loop
>
> I've had a different idea for how to do this for a while now.  Let me
> see...
>
> You might not be familiar with how ovs-vsctl waits until the changes
> that it makes to the Open vSwitch configuration take effect before
> exiting.  It uses a sequence number protocol in the Open_vSwitch
> database table.  This table (which has exactly one row) has two integer
> columns, named cur_cfg and next_cfg.  Initially cur_cfg and next_cfg are
> both zero.  When ovs-vsctl modifies the configuration, it also
> atomically increments next_cfg.  Before ovs-vswitchd reconfigures
> itself, it reads next_cfg, then when it finishes it sets cur_cfg to the
> next_cfg value that it read.  ovs-vsctl waits until cur_cfg is greater
> than or equal to the next_cfg that it set before it exits.
>
> This has a number of nice properties.  It has low overhead in time and
> space, it works efficiently with any number of writers, and later on it
> is easy to observe what happened and when by looking at the database
> log.
>
> My thought was to extend this concept to the OVN distributed system.
> For example:
>
> * Add a next_cfg value somewhere in the OVN_Northbound
>   database.  When ovn-nbctl or something else updates the
>   database and wants to allow for finding out where the updates
>   are propagated, it increments next_cfg.
>
> * Add a similar next_cfg somewhere in OVN_Southbound, and make
>   ovn-northd propagate the value from northbound to southbound.
>
> * Add a cur_cfg column to the Chassis table in OVN_Southbound.
>   When an ovn-controller brings its chassis up to date with a
>   given configuration, it stores the next_cfg value that it just
>   got up-to-date with into its own cur_cfg.
>
> * In theory ovn-northd could propagate the current minimum value
>   of cur_cfg back to the northbound database, if that's useful.
>   In a real system with constant failures though it's hard for
>   me to guess whether it's realistic.
>
> This could even be used such that ovn-nbctl waits for the whole
> distributed system to catch up before it exits.
>
> What are your thoughts?

This is definitely an interesting idea, but I'm not convinced of the last
two items.  I agree that while having ovn-northd propagate the current
minimum value of cur_cfg up is good in theory, the idea of blocking
ovn-nbctl from exiting until everything catches up worries me.  For
example,
what happens if the SB Chassis table has an orphaned entry due to error
conditions?  Does that lead to the provisioning plane shutting down because
ovn-nbctl never exits?

I think I'd rather see ovn-nbctl exit immediately upon making changes and
we provide an additional commands to ovn-nbctl that either returns a status
code based on whether the system is up to date or not and/or the values of
the next_cfg and cur_cfg columns (I assume that ovn-sbctl will show the
next_cfg and cur_cfg for chassis if that table is shown). That way we can
continue making progress in the case of corner failure cases and have a
way to identify which row(s) in the chassis table form the source of a
problem.

Thus (to close the loop), the test case would go ahead and make the needed
changes with ovn-nbctl and then have a gate where it waits until ovn-nbctl
reports that the system is up to date, at which point the test packets can
be sent.

Ryan

Re: [ovs-dev] [RFC] Idea for fixing "raceful" E2E ovn tests

2016-04-22 Thread Ben Pfaff
On Thu, Apr 14, 2016 at 08:37:27AM -0500, Ryan Moats wrote:
> I've pretty much become fed up with the "raceful" nature of the E2E
> ovn test cases and so I've set my self the goal of fixing them.
> 
> After some thought last night, I *think* I might have found a way
> to do it.  Now, since I'm not 100% that my idea is the cleanest way
> to fix things, I thought I'd throw the idea out on the table first
> and get some feedback before I go off and write code.
> 
> I'm thinking of the following changes:
> 
> in ovn/controller/ofctrl.c, change ofctrl_put to return a boolean
> (true if a flow is queue, false otherwise).
> 
> in ovn/controller/ovn-controller.c:
> 1. add a command line argument (--unit-test).
> 2. if ofctrl_put return true and the above command line argument
>is specified, dump a line to the log file saying that ovn-controller
>hasn't made any OF changes in the last loop
> 
> in the ovn E2E tests cases:
> 1. start ovn-controller with the --unit-test argument
> 2. instead of sleep 1 for waiting for ovn-northd/ovn-controller to
>quiesce, look at the tail of the ovn-controller logs for two
>consecutive loops where ovn-controller hasn't made any OF changes
>in the last loop

I've had a different idea for how to do this for a while now.  Let me
see...

You might not be familiar with how ovs-vsctl waits until the changes
that it makes to the Open vSwitch configuration take effect before
exiting.  It uses a sequence number protocol in the Open_vSwitch
database table.  This table (which has exactly one row) has two integer
columns, named cur_cfg and next_cfg.  Initially cur_cfg and next_cfg are
both zero.  When ovs-vsctl modifies the configuration, it also
atomically increments next_cfg.  Before ovs-vswitchd reconfigures
itself, it reads next_cfg, then when it finishes it sets cur_cfg to the
next_cfg value that it read.  ovs-vsctl waits until cur_cfg is greater
than or equal to the next_cfg that it set before it exits.

This has a number of nice properties.  It has low overhead in time and
space, it works efficiently with any number of writers, and later on it
is easy to observe what happened and when by looking at the database
log.

My thought was to extend this concept to the OVN distributed system.
For example:

* Add a next_cfg value somewhere in the OVN_Northbound
  database.  When ovn-nbctl or something else updates the
  database and wants to allow for finding out where the updates
  are propagated, it increments next_cfg.

* Add a similar next_cfg somewhere in OVN_Southbound, and make
  ovn-northd propagate the value from northbound to southbound.

* Add a cur_cfg column to the Chassis table in OVN_Southbound.
  When an ovn-controller brings its chassis up to date with a
  given configuration, it stores the next_cfg value that it just
  got up-to-date with into its own cur_cfg.

* In theory ovn-northd could propagate the current minimum value
  of cur_cfg back to the northbound database, if that's useful.
  In a real system with constant failures though it's hard for
  me to guess whether it's realistic.

This could even be used such that ovn-nbctl waits for the whole
distributed system to catch up before it exits.

What are your thoughts?
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] lib: protect daemon_set_new_user against non existing user:group specs

2016-04-22 Thread Ben Pfaff
On Fri, Apr 22, 2016 at 04:04:26PM +0200, Christian Ehrhardt wrote:
> From the manpages of getgrnam_r (getpwnam_r is similar):
> "If no matching group record was found, these functions return 0 and
> store NULL in *result."
> 
> The code checked only against errors, but non existing users didn't set
> e != 0 therefore the code could try to set arbitrary uid/gid values.
> 
> Fixes: e91b927d lib/daemon: support --user option for all OVS daemon
> 
> Signed-off-by: Christian Ehrhardt 

Thanks for the patch.

This does not compile:

../lib/daemon-unix.c:975:18: error: invalid operands to binary expression 
('struct passwd' and 'void *')
../lib/daemon-unix.c:1018:22: error: invalid operands to binary expression 
('struct group' and 'void *')
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v4] debian : upstream_version fix

2016-04-22 Thread Ben Pfaff
On Fri, Apr 22, 2016 at 10:42:43AM +, Zoltán Balogh wrote:
> From:  
> 
> The Debian Policy Manual 
> (https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version) 
> says that the upstream_version may contain only alphanumerics and the 
> characters . + - : ~ (full stop, plus, hyphen, colon, tilde) and should start 
> with a digit.
> 
> Currently, the upstream_version is defined in the debian/rules file:
> 
> DEB_UPSTREAM_VERSION=$(shell dpkg-parsechangelog | sed -rne 's,^Version: 
> ([0-9]:)*([^-]+).*,\2,p')
> 
> The version number is taken from the dpkg-parsechangelog printout then the 
> first part of the version number which does not contain hyphen is filtered 
> out with sed. However the Debian Policy Manual says that hyphen is allowed in 
> the upstream_version. 
> 
> This is not a problem with current vanilla OVS debian version. But, if a 
> postfix string including a hyphen is added to the upstream_version then 
> installation of datapath-dkms package will fail. 
> 
> Reported-by: Zoltán Balogh 
> Tested-by: Zoltán Balogh 

Applied, thanks!
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] For those that will be in Austin on Friday....

2016-04-22 Thread Ryan Moats

I've put in for a time slot at
https://etherpad.openstack.org/p/newton-neutron-unplugged-track to talk
about networking-ovn "next steps".

If you are interested and will be around, drop by and add your +1...

Ryan (regXboi)
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 0/2] ovn: QOS updates with DSCP support

2016-04-22 Thread Ben Pfaff
On Fri, Apr 22, 2016 at 12:44:12PM +0530, bscha...@redhat.com wrote:
> From: Babu Shanmugam 
> 
> Following are done through this series
> 1. Changed the old approach of policing the packets. It is now shaped
>with queues. Changed the Logical_Port options for NB db
> 2. Support of DSCP marking through options field in Logical_Port table
> 
> Babu Shanmugam (2):
>   ovn: Replace the QOS policing parameters with the usage of QOS table
>   ovn: QOS DSCP markings for ports

Have you tested this?  There are at least two aspects that seem relevant
to testing.  First, propagating queuing through tunnels is somewhat
indirect and one needs to make sure that the QoS configuration actually
makes it to the physical device.  Second, HTB has a reputation for poor
quality for links above about 1 Gbps, which isn't very fast
anymore--that's why we also support HFSC.

Thanks,

Ben.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v2] packets: use flow protocol when recalculating ipv6 checksums

2016-04-22 Thread Ben Pfaff
On Fri, Apr 22, 2016 at 10:22:56PM +1000, Simon Horman wrote:
> When using masked actions the ipv6_proto field of an action
> to set IPv6 fields may be zero rather than the prevailing protocol
> which will result in skipping checksum recalculation.
> 
> This patch resolves the problem by relying on the protocol
> in the packet rather than that in the set field action.
> 
> A similar fix for the kernel datapath has been accepted into David Miller's
> 'net' tree as b4f70527f052 ("openvswitch: use flow protocol when
> recalculating ipv6 checksums").
> 
> Cc: Jarno Rajahalme 
> Fixes: 6d670e7f0d45 ("lib/odp: Masked set action execution and printing.")
> Signed-off-by: Simon Horman 
> ---
> While preparing this I noticed that there does seem to be some scope
> to consolidate packet_rh_present() and part of miniflow_extract().
> 
> v2
> * Updated changelog to refer to protocol in packet, as this patch does,
>   rather than the flow key, which the kernel datapath variant of the patch
>   does. This was a copy-paste error in preparing the changelog.

GCC 4.9.1 gives me the following error:

../lib/packets.c: In function 'packet_set_ipv6':
../lib/packets.c:951:9: error: 'proto' may be used uninitialized in this 
function [-Werror=maybe-uninitialized]
 packet_update_csum128(packet, proto, addr, new_addr);
 ^
../lib/packets.c:1018:13: note: 'proto' was declared here
 uint8_t proto;
 ^

Other than that, this seems like a good bug fix, thank you!

Acked-by: Ben Pfaff 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] odp-util: Fix build warning on flags_mask.

2016-04-22 Thread Ben Pfaff
On Tue, Apr 19, 2016 at 12:06:44PM +0100, antonio.fische...@intel.com wrote:
> Fix build warning: 'flags_mask' may be used uninitialized.
> 
> Signed-off-by: Antonio Fischetti 

Applied, thanks!
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v1] Support port level IPFIX

2016-04-22 Thread Ben Pfaff
On Tue, Apr 19, 2016 at 03:55:10PM +0800, Daniel Benli Ye wrote:
> From: Benli Ye 
> 
> This patch enables port level IPFIX. Before this patch, OVS supported
> per bridge IPFIX and per flow IPFX, and exporting packet tunnel headers
> is only supported by bridge IPFIX. This patch adds port level IPFIX
> for easy configuration and port level IPFIX also supports exporting
> packet tunnel headers, just the same with bridge level IPFIX.
> Three main things are done in this patch.
>   1) Add a column ipfix in Port table to ref IPFIX table
>   2) Each interface in the port should use the port IPFiX configuration
>   3) A hash map is used to manage the port which is configured IPFIX
> 
> CLI to configure Port IPFIX:
>   1) Configure
>  ovs-vsctl -- set Port port0 ipfix=@i -- --id=@i create IPFIX \
>  targets=\"10.24.122.72:4739\" sampling=1 obs_domain_id=123 \
>  obs_point_id=456 cache_active_timeout=1 cache_max_flows=128 \
>  other_config:enable-tunnel-sampling=true
>   2) Clear
>  ovs-vsctl clear Port port0 ipfix

Thanks for working on IPFIX!  We don't have enough IPFIX expertise
around here, so new contributors are always welcome.

The patch lacks a Signed-off-by.  We will need it before it can be
applied.  CONTRIBUTING.md explains the format and the meaning, which is
to agree to the Developer's Certificate of Origin, which is also in
CONTRIBUTING.md; please read it.

Due to the lack of signoff, I did not do a detailed review, but I have
some general comments.  First, this patch follows the coding style
remarkably well, especially for a first patch--well done, thank you!

Second, this is the third form of configuration to be introduced for
IPFIX.  I worry that we'll end up with a fourth, and a fifth, ...  Why
does IPFIX need so many kinds of configuration; that is, why can't the
details of what packets it selects, etc., be controlled from flows in
the flow table, rather than by configuration in the database?  If that
were the case, then we would not need to so many forms of configuration:
the controller could control it through the flows over which it already
has so much control.

Thanks,

Ben.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH net-next 2/9] libnl: nla_put_le64(): align on a 64-bit area

2016-04-22 Thread Eric Dumazet
On Fri, 2016-04-22 at 17:31 +0200, Nicolas Dichtel wrote:
> nla_data() is now aligned on a 64-bit area.
> 
> Signed-off-by: Nicolas Dichtel 
> ---
>  include/net/netlink.h |  8 +---
>  include/net/nl802154.h|  6 ++
>  net/ieee802154/nl802154.c | 13 -
>  3 files changed, 19 insertions(+), 8 deletions(-)
> 
> diff --git a/include/net/netlink.h b/include/net/netlink.h
> index 6f51a8a06498..7f6b99483ab7 100644
> --- a/include/net/netlink.h
> +++ b/include/net/netlink.h
> @@ -878,14 +878,16 @@ static inline int nla_put_net64(struct sk_buff *skb, 
> int attrtype, __be64 value)
>  }
>  
>  /**
> - * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer
> + * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer and 
> align it
>   * @skb: socket buffer to add attribute to
>   * @attrtype: attribute type
>   * @value: numeric value
> + * @padattr: attribute type for the padding
>   */
> -static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 
> value)
> +static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 
> value,
> +int padattr)
>  {
> - return nla_put(skb, attrtype, sizeof(__le64), );
> + return nla_put_64bit(skb, attrtype, sizeof(__le64), , padattr);
>  }
>  

But _why_ is it needed ?

nla_put() has no alignment assumptions, it simply copies 8 bytes.

Seems this is going too far.



___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] Add change tracking documentation

2016-04-22 Thread Ansari, Shad

> +/* Change tracking.  In OVSDB, change tracking is applied at each
> +client in
> + * the IDL layer.  This means that when a client makes a request to
> +track
> + * changes on a particular table, they are essentially requesting
> + * information about the incremental changes to that table from the
> +point in
> + * time that the request is made.  Once the client clears tracked
> +changes,
> + * that information will no longer be available.  

Suggestion: I would leave it at that. The implication you are deriving may not 
apply to all usages.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v2 RFC] ovn: Support native dhcp using 'continuations'

2016-04-22 Thread Ben Pfaff
On Mon, Apr 18, 2016 at 10:59:16PM +0530, Numan Siddique wrote:
> To support native dhcp in ovn
>  - A new column 'dhcp-options' is added in 'Logical_Switch' north db.
>  - A logical flow is added for each logical port to handle dhcp packets
>if the CMS has defined dhcp options in this column.
> 
> Eg.
> action=(dhcp_offer(offerip = 10.0.0.2, router = 10.0.0.1,
> server_id = 10.0.0.2, mtu = 1300, lease_time = 3600,
> netmask = 255.255.255.0); eth.dst = eth.src; eth.src = 00:00:00:00:00:03;
> ip4.dst = 10.0.0.2; ip4.src = 10.0.0.2; udp.src = 67; udp.dst = 68;
> outport = inport; inport = ""; /* Allow sending out inport. */ output;)
> 
>  - ovn-controller will convert this logical flow to a packet-in OF flow with
>'pause' flag set. The dhcp options defined in 'dhcp_offer' action
>are stored in the 'userdata'
> 
>  - When the ovn-controller receives the packet-in, it would frame a new
>dhcp packet with the dhcp options set in the 'userdata' and resume
>the packet.
> 
> TODO: Test cases and updating the necessary documentation.
> 
> Signed-off-by: Numan Siddique 

Thanks for working on this!

I get one error:

../ovn/lib/actions.c:355:34: error: implicit declaration of function 
'ip_count_cidr_bits' is invalid in C99 [-Werror,-Wimplicit-function-declaration]

All the casts in dhcp.h are unusual, any particular reason for them?

This seems pretty darned well put together for an RFC!  I'll look
forward to the final version.

There are some oddities in the documentation:  and  shouldn't
normally nest in , and usually one would terminate the  between
paragraphs (probably the documentation processor should be pickier):

+
+  OVN implements a native DHCP support which caters to the common
+  use case of providing an IP address to a booting instance by 
providing
+  stateless replies to DHCP requests based on statically configured
+  address mappings. To do this it allows a short list of DHCP options 
to be
+  configured and applied at each compute host running ovn-controller.
+
+  This column is a key/value pair with key being the cidr
+  of the subnet belonging to this logical switch and value being the
+  DHCP options defined as a valid JSON string with
+  key/value pairs as DHCP options. All the values should be strings.
+
+  
+Examples:
+  

I'm not sure there's much value in adding a specialized ovn-nbctl
command, though it's harmless.

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v4 1/1] ovn: Add column enabled to table Logical_Router

2016-04-22 Thread Ben Pfaff
On Mon, Apr 18, 2016 at 04:38:33AM -0400, JunoZhu wrote:
> This patch add column "enabled" to table Logical_Router for
>  setting router administrative state.
> 
> The type of "enabled" is bool.
> 
> If the administrative state is false, delete all the flows
> relevant to the logical router from table Logical_Flow.
> 
> Signed-off-by: Na Zhu 

Applied, thanks!
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH V3] add lrouter and lrport related commands to ovn-nbctl

2016-04-22 Thread Ben Pfaff
On Mon, Apr 18, 2016 at 10:04:11AM -0700, ngh...@us.ibm.com wrote:
> ovn-nbctl provides a shortcut to perform commands related lswitch, lport
> and such but it doesn't have similar commands related to logical routers
> and logical router ports. Also, 'ovn-nbctl show' is supposed to show an
> overview of database contents, which means it should show the routers
> as well. "ovn-nbctl show LSWITCH" shows the switch details, similarly
> "ovn-nbctl show LROUTER" should show the router details too. This patch
> takes care of all of these.
> 
> Modifications;
> 1) ovn-nbctl show -- will now show lrouters as well
> 2) ovn-nbctl show  -- will show the router now
> 
> New commands added:
> 3) ovn-nbctl lrouter-add [LROUTER]
> 4) ovn-nbctl lrouter-del LROUTER
> 5) ovn-nbctl lrouter-list
> 6) lrport-add LROUTER LRPORT
> 7) lrport-del LRPORT
> 8) lrport-list LROUTER
> 9) lrport-set-mac-address LRPORT [ADDRESS]
> 10) lrport-get-mac-address LRPORT
> 11) lrport-set-enabled LRPORT STATE
> 12) lrport-get-enabled LRPORT
> 
> Unit test cases have been added to test all of these modifications and
> additions.

Thanks for the patch.

Please document the new commands in ovn-nbctl.8.xml.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v2 2/2] ofproto-dpif: Do not count resubmit to later tables against limit.

2016-04-22 Thread Ben Pfaff
On Fri, Apr 22, 2016 at 09:38:09AM -0500, Ryan Moats wrote:
> "dev"  wrote on 04/21/2016 12:50:17 PM:
> 
> > From: Ben Pfaff 
> > To: dev@openvswitch.org
> > Cc: Ben Pfaff , Guru Shetty 
> > Date: 04/21/2016 12:50 PM
> > Subject: [ovs-dev] [PATCH v2 2/2] ofproto-dpif: Do not count
> > resubmit to later tables against limit.
> > Sent by: "dev" 
> >
> > Open vSwitch must ensure that flow translation takes a finite amount of
> > time.  Until now it has implemented this by limiting the depth of
> > recursion.  The initial limit, in version 1.0.1, was no recursion at all,
> > and then over the years it has increased to 8 levels, then 16, then 32,
> > and 64 for the last few years.  Now reports are coming in that 64 levels
> > are inadequate for some OVN setups.  The natural inclination would be to
> > double the limit again to 128 levels.
> >
> > This commit attempts another approach.  Instead of increasing the limit,
> > it reduces the class of resubmits that count against the limit.  Since
> the
> > goal for the depth limit is to prevent an infinite amount of work, it's
> > not necessary to count resubmits that can't lead to infinite work.  In
> > particular, a resubmit from a table numbered x to a table y > x cannot do
> > this, because any OpenFlow switch has a finite number of tables.  Because
> > in fact a resubmit (or goto_table) from one table to a later table is the
> > most common form of an OpenFlow pipeline, I suspect that this will
> greatly
> > alleviate the pressure to increase the depth limit.
> >
> > Reported-by: Guru Shetty 
> > Signed-off-by: Ben Pfaff 
> > ---
> >  lib/ofp-actions.c| 19 +++--
> >  ofproto/ofproto-dpif-xlate.c | 66 +
> > +--
> >  ofproto/ofproto-dpif-xlate.h | 23 +--
> >  ofproto/ofproto-dpif.c   |  5 ++--
> >  ofproto/ofproto-dpif.h   |  3 +-
> >  tests/ofproto-dpif.at| 41 ++-
> >  utilities/ovs-ofctl.8.in | 28 ---
> >  7 files changed, 152 insertions(+), 33 deletions(-)
> 
> I believe I'm seeing some issues with the recursion level as well, so I'd
> like
> to get this in and see if it helps...
> 
> Acked-by: Ryan Moats 

Thanks for the ack.

Guru, I mostly wrote this up for you, does this solve the problem you
saw?

Thanks,

Ben.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/2] ovn-controller: add local address for ovn-remote connection

2016-04-22 Thread Ben Pfaff
On Fri, Apr 22, 2016 at 03:51:57PM +, Huang, Lei wrote:
> 
> On 4/22/16, 11:21 PM, "dev on behalf of Ben Pfaff" 
>  wrote:
> 
> >On Fri, Apr 22, 2016 at 04:14:25PM +0800, Huang Lei wrote:
> >> From: Huang Lei 
> >> 
> >> Add a local address option for ovn-controller's TCP socket which
> >> connect to southbound ovsdb-server.
> >> 
> >> In a test environment, an interface may have multiple IP addresses
> >> in same subnet, if TCP client socket doesn't call bind() explicitly,
> >> OS chooses an ip and port for it, and the IP is the primary IP of
> >> the subnet on an interface. With this patch, a secondary IP of the
> >> subnet can be used as local address of TCP connection.
> >> 
> >> Signed-off-by: Huang Lei 
> >
> >Why not just extend the syntax for streams to allow a local address (and
> >possibly local port) to be specified?
> >e.g. remote-ip:remote-port:local-ip:local-port.
> 
> Hmmm, so the possible values of ‘external-ids:ovn-remote’ will be as follow:
> external-ids:ovn-remote=tcp:192.168.0.1:6640:192.168.0.2:1
> 
> external-ids:ovn-remote=tcp:192.168.0.1:6640:192.168.0.2
> 
> external-ids:ovn-remote=tcp:192.168.0.1:6640
> 
> 
> Right?

Yes; I think it should be possible to omit fields too:
tcp:192.168.0.1::192.168.0.2:

It seems reasonable to me.  Well, maybe not reasonable per se, but
reasonable for the odd corner case where one really cares about the
local IP.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH V2] datapath-windows: Fixed buffer overflow in OvsInitVportWithNicParam

2016-04-22 Thread Nithin Raju
-Original Message-
From: dev  on behalf of Paul Boca

Date: Friday, April 22, 2016 at 12:21 AM
To: "dev@openvswitch.org" 
Subject: [ovs-dev] [PATCH V2] datapath-windows: Fixed buffer overflow in
OvsInitVportWithNicParam

>nicParam->PermanentMacAddress is 32 bytes and vport->permMacAddress is 6
>bytes
>
>Signed-off-by: Paul-Daniel Boca 

Acked-by: Nithin Raju 

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/2] ovn-controller: add local address for ovn-remote connection

2016-04-22 Thread Huang, Lei

On 4/22/16, 11:21 PM, "dev on behalf of Ben Pfaff"  wrote:

>On Fri, Apr 22, 2016 at 04:14:25PM +0800, Huang Lei wrote:
>> From: Huang Lei 
>> 
>> Add a local address option for ovn-controller's TCP socket which
>> connect to southbound ovsdb-server.
>> 
>> In a test environment, an interface may have multiple IP addresses
>> in same subnet, if TCP client socket doesn't call bind() explicitly,
>> OS chooses an ip and port for it, and the IP is the primary IP of
>> the subnet on an interface. With this patch, a secondary IP of the
>> subnet can be used as local address of TCP connection.
>> 
>> Signed-off-by: Huang Lei 
>
>Why not just extend the syntax for streams to allow a local address (and
>possibly local port) to be specified?
>e.g. remote-ip:remote-port:local-ip:local-port.

Hmmm, so the possible values of ‘external-ids:ovn-remote’ will be as follow:
external-ids:ovn-remote=tcp:192.168.0.1:6640:192.168.0.2:1

external-ids:ovn-remote=tcp:192.168.0.1:6640:192.168.0.2

external-ids:ovn-remote=tcp:192.168.0.1:6640


Right?

BR
Huang Lei

>___
>dev mailing list
>dev@openvswitch.org
>http://openvswitch.org/mailman/listinfo/dev
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] debian, rhel: Ship ovs shared libraries and header files

2016-04-22 Thread Ben Pfaff
On Fri, Apr 15, 2016 at 01:28:18PM -0700, ec...@vmware.com wrote:
> From: Edwin Chiu 
> 
> Compile and package ovs shared libraries and create new header
> package for debian (openvswitch-dev) and rhel (openvswitch-devel).
> 
> VMware-BZ: #1556299
> Signed-off-by: Edwin Chiu 
> Co-authored-by: Harold Lim 

I've pushed this to my "reviews" repository to see if it passes all the
build checks:
https://travis-ci.org/blp/ovs-reviews/builds/125053467

I'll check back in a while to see the results.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/2] ofproto-dpif-xlate: Tidy up ct_mark xlate code.

2016-04-22 Thread Ben Pfaff
On Fri, Apr 15, 2016 at 11:36:05AM -0700, Joe Stringer wrote:
> Make the ct_mark netlink serialization more consistent with the way that
> ct_label is serialized.
> 
> Signed-off-by: Joe Stringer 

This is nice, I noticed and wondered about the inconsistency while
reviewing the previous patch.

Acked-by: Ben Pfaff 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v2] cksum: Refine schema cksum validation

2016-04-22 Thread Ben Pfaff
On Fri, Apr 15, 2016 at 11:08:47PM +, Rodriguez Betancourt, Esteban wrote:
> Calculates the cksum removing the cksum line using a more
> strict regex than the used previously.
> It fixes a problem when calculating the cksum of a schema that
> has fields with the substring cksum (e.g.: a checksum column),
> lines that the previous cksum calculation incorrectly removes
> before running cksum.
> Also, the tool calculate-schema-cksum is introduced. This tool
> calculates the cksum of a schema file. It could be used in other
> programs, instead of calculating the cksum in an eventually
> different way than the expected by cksum-schema-check and other
> tools.
> 
> Signed-off-by: Esteban Rodriguez Betancourt 

Applied, thanks!
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/2] ofproto-dpif-xlate: xlate ct_{mark, label} correctly.

2016-04-22 Thread Ben Pfaff
On Fri, Apr 15, 2016 at 11:36:04AM -0700, Joe Stringer wrote:
> When translating multiple ct actions in a row which include modification
> of ct_mark or ct_labels, these fields could be incorrectly translated
> into datapath actions, resulting in modification of these fields for
> entries when the OpenFlow rules didn't actually specify the change.
> 
> For instance, the following OpenFlow actions:
> ct(zone=1,commit,exec(set_field(1->ct_mark))),ct(zone=2,table=1),...
> 
> Would translate into the datapath actions:
> ct(zone=1,commit,mark=1),ct(zone=2,mark=1),recirc(...),...
> 
> This commit fixes the issue by zeroing the wildcards for these fields
> prior to performing nested actions translation (and restoring
> afterwards). As such, these fields do not hold both the match and the
> field modification values at the same time. As a result, the ct_mark and
> ct_labels don't leak from one ct action to the next.
> 
> Fixes: 8e53fe8cf7a1 ("Add connection tracking mark support.")
> Fixes: 9daf23484fb1 ("Add connection tracking label support.")
> Signed-off-by: Joe Stringer 

I looked this over carefully and did not spot any problems.  Thank you!

Acked-by: Ben Pfaff 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH net-next 0/9] netlink: align attributes when needed (patchset #1)

2016-04-22 Thread Nicolas Dichtel

This is the continuation of the work done to align netlink attributes
when these attributes contain some 64-bit fields.

David, if the third patch is too big (or maybe the series), I can split it.
Just tell me what you prefer.

 include/linux/netfilter/ipset/ip_set.h |  9 ++--
 include/net/netlink.h  | 60 --
 include/net/nl802154.h |  6 +++
 include/uapi/linux/fib_rules.h |  1 +
 include/uapi/linux/l2tp.h  |  1 +
 include/uapi/linux/lwtunnel.h  |  2 +
 include/uapi/linux/neighbour.h |  2 +
 include/uapi/linux/netfilter/ipset/ip_set.h|  1 +
 include/uapi/linux/netfilter/nf_tables.h   |  8 +++
 include/uapi/linux/netfilter/nfnetlink_acct.h  |  1 +
 include/uapi/linux/netfilter/nfnetlink_conntrack.h |  3 ++
 include/uapi/linux/openvswitch.h   |  1 +
 include/uapi/linux/tcp_metrics.h   |  1 +
 include/uapi/linux/xfrm.h  |  1 +
 kernel/taskstats.c | 37 ++---
 lib/nlattr.c   |  8 ++-
 net/core/fib_rules.c   |  4 +-
 net/core/neighbour.c   | 19 +++
 net/ieee802154/nl802154.c  | 13 +++--
 net/ipv4/ip_tunnel_core.c  | 10 ++--
 net/ipv4/tcp_metrics.c |  6 ++-
 net/l2tp/l2tp_netlink.c|  3 +-
 net/netfilter/nf_conntrack_netlink.c   | 18 ---
 net/netfilter/nf_conntrack_proto_dccp.c|  4 +-
 net/netfilter/nf_tables_api.c  | 24 ++---
 net/netfilter/nf_tables_trace.c|  5 +-
 net/netfilter/nfnetlink_acct.c |  9 ++--
 net/netfilter/nft_counter.c|  6 ++-
 net/netfilter/nft_dynset.c |  3 +-
 net/netfilter/nft_limit.c  |  6 ++-
 net/openvswitch/flow_netlink.c |  5 +-
 net/xfrm/xfrm_user.c   | 10 ++--
 32 files changed, 178 insertions(+), 109 deletions(-)

Comments are welcomed,
Regards,
Nicolas
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH net-next 7/9] libnl: add nla_put_u64_64bit() helper

2016-04-22 Thread Nicolas Dichtel
With this function, nla_data() is aligned on a 64-bit area.

Signed-off-by: Nicolas Dichtel 
---
 include/net/netlink.h | 13 +
 1 file changed, 13 insertions(+)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 113b483b6ee8..e589cb3dccee 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -858,6 +858,19 @@ static inline int nla_put_u64(struct sk_buff *skb, int 
attrtype, u64 value)
 }
 
 /**
+ * nla_put_u64_64bit - Add a u64 netlink attribute to a skb and align it
+ * @skb: socket buffer to add attribute to
+ * @attrtype: attribute type
+ * @value: numeric value
+ * @padattr: attribute type for the padding
+ */
+static inline int nla_put_u64_64bit(struct sk_buff *skb, int attrtype,
+   u64 value, int padattr)
+{
+   return nla_put_64bit(skb, attrtype, sizeof(u64), , padattr);
+}
+
+/**
  * nla_put_be64 - Add a __be64 netlink attribute to a socket buffer and align 
it
  * @skb: socket buffer to add attribute to
  * @attrtype: attribute type
-- 
2.8.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH net-next 2/9] libnl: nla_put_le64(): align on a 64-bit area

2016-04-22 Thread Nicolas Dichtel
nla_data() is now aligned on a 64-bit area.

Signed-off-by: Nicolas Dichtel 
---
 include/net/netlink.h |  8 +---
 include/net/nl802154.h|  6 ++
 net/ieee802154/nl802154.c | 13 -
 3 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 6f51a8a06498..7f6b99483ab7 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -878,14 +878,16 @@ static inline int nla_put_net64(struct sk_buff *skb, int 
attrtype, __be64 value)
 }
 
 /**
- * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer
+ * nla_put_le64 - Add a __le64 netlink attribute to a socket buffer and align 
it
  * @skb: socket buffer to add attribute to
  * @attrtype: attribute type
  * @value: numeric value
+ * @padattr: attribute type for the padding
  */
-static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value)
+static inline int nla_put_le64(struct sk_buff *skb, int attrtype, __le64 value,
+  int padattr)
 {
-   return nla_put(skb, attrtype, sizeof(__le64), );
+   return nla_put_64bit(skb, attrtype, sizeof(__le64), , padattr);
 }
 
 /**
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index 32cb3e591e07..fcab4de49951 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -138,6 +138,8 @@ enum nl802154_attrs {
NL802154_ATTR_SEC_KEY,
 #endif /* CONFIG_IEEE802154_NL802154_EXPERIMENTAL */
 
+   NL802154_ATTR_PAD,
+
__NL802154_ATTR_AFTER_LAST,
NL802154_ATTR_MAX = __NL802154_ATTR_AFTER_LAST - 1
 };
@@ -295,6 +297,7 @@ enum nl802154_dev_addr_attrs {
NL802154_DEV_ADDR_ATTR_MODE,
NL802154_DEV_ADDR_ATTR_SHORT,
NL802154_DEV_ADDR_ATTR_EXTENDED,
+   NL802154_DEV_ADDR_ATTR_PAD,
 
/* keep last */
__NL802154_DEV_ADDR_ATTR_AFTER_LAST,
@@ -320,6 +323,7 @@ enum nl802154_key_id_attrs {
NL802154_KEY_ID_ATTR_IMPLICIT,
NL802154_KEY_ID_ATTR_SOURCE_SHORT,
NL802154_KEY_ID_ATTR_SOURCE_EXTENDED,
+   NL802154_KEY_ID_ATTR_PAD,
 
/* keep last */
__NL802154_KEY_ID_ATTR_AFTER_LAST,
@@ -402,6 +406,7 @@ enum nl802154_dev {
NL802154_DEV_ATTR_EXTENDED_ADDR,
NL802154_DEV_ATTR_SECLEVEL_EXEMPT,
NL802154_DEV_ATTR_KEY_MODE,
+   NL802154_DEV_ATTR_PAD,
 
/* keep last */
__NL802154_DEV_ATTR_AFTER_LAST,
@@ -414,6 +419,7 @@ enum nl802154_devkey {
NL802154_DEVKEY_ATTR_FRAME_COUNTER,
NL802154_DEVKEY_ATTR_EXTENDED_ADDR,
NL802154_DEVKEY_ATTR_ID,
+   NL802154_DEVKEY_ATTR_PAD,
 
/* keep last */
__NL802154_DEVKEY_ATTR_AFTER_LAST,
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 16ef0d9f566e..614072064d03 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -722,7 +722,8 @@ ieee802154_llsec_send_key_id(struct sk_buff *msg,
break;
case NL802154_DEV_ADDR_EXTENDED:
if (nla_put_le64(msg, NL802154_DEV_ADDR_ATTR_EXTENDED,
-desc->device_addr.extended_addr))
+desc->device_addr.extended_addr,
+NL802154_DEV_ADDR_ATTR_PAD))
return -ENOBUFS;
break;
default:
@@ -742,7 +743,8 @@ ieee802154_llsec_send_key_id(struct sk_buff *msg,
break;
case NL802154_KEY_ID_MODE_INDEX_EXTENDED:
if (nla_put_le64(msg, NL802154_KEY_ID_ATTR_SOURCE_EXTENDED,
-desc->extended_source))
+desc->extended_source,
+NL802154_KEY_ID_ATTR_PAD))
return -ENOBUFS;
break;
default:
@@ -819,7 +821,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 
seq, int flags,
 
/* address settings */
if (nla_put_le64(msg, NL802154_ATTR_EXTENDED_ADDR,
-wpan_dev->extended_addr) ||
+wpan_dev->extended_addr,
+NL802154_ATTR_PAD) ||
nla_put_le16(msg, NL802154_ATTR_SHORT_ADDR,
 wpan_dev->short_addr) ||
nla_put_le16(msg, NL802154_ATTR_PAN_ID, wpan_dev->pan_id))
@@ -1614,7 +1617,7 @@ static int nl802154_send_device(struct sk_buff *msg, u32 
cmd, u32 portid,
nla_put_le16(msg, NL802154_DEV_ATTR_SHORT_ADDR,
 dev_desc->short_addr) ||
nla_put_le64(msg, NL802154_DEV_ATTR_EXTENDED_ADDR,
-dev_desc->hwaddr) ||
+dev_desc->hwaddr, NL802154_DEV_ATTR_PAD) ||
nla_put_u8(msg, NL802154_DEV_ATTR_SECLEVEL_EXEMPT,
   dev_desc->seclevel_exempt) ||
nla_put_u32(msg, NL802154_DEV_ATTR_KEY_MODE, dev_desc->key_mode))
@@ -1778,7 

[ovs-dev] [PATCH net-next 4/9] libnl: nla_put_net64(): align on a 64-bit area

2016-04-22 Thread Nicolas Dichtel
nla_data() is now aligned on a 64-bit area.

The temporary function nla_put_be64_32bit() is removed in this patch.

Signed-off-by: Nicolas Dichtel 
---
 include/linux/netfilter/ipset/ip_set.h  |  9 ++---
 include/net/netlink.h   | 14 ++
 include/uapi/linux/netfilter/ipset/ip_set.h |  1 +
 3 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/include/linux/netfilter/ipset/ip_set.h 
b/include/linux/netfilter/ipset/ip_set.h
index f48b8a664b0f..83b9a2e0d8d4 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -351,7 +351,8 @@ ip_set_put_skbinfo(struct sk_buff *skb, struct 
ip_set_skbinfo *skbinfo)
return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
nla_put_net64(skb, IPSET_ATTR_SKBMARK,
  cpu_to_be64((u64)skbinfo->skbmark << 32 |
- skbinfo->skbmarkmask))) ||
+ skbinfo->skbmarkmask),
+ IPSET_ATTR_PAD)) ||
   (skbinfo->skbprio &&
nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
  cpu_to_be32(skbinfo->skbprio))) ||
@@ -374,9 +375,11 @@ static inline bool
 ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
 {
return nla_put_net64(skb, IPSET_ATTR_BYTES,
-cpu_to_be64(ip_set_get_bytes(counter))) ||
+cpu_to_be64(ip_set_get_bytes(counter)),
+IPSET_ATTR_PAD) ||
   nla_put_net64(skb, IPSET_ATTR_PACKETS,
-cpu_to_be64(ip_set_get_packets(counter)));
+cpu_to_be64(ip_set_get_packets(counter)),
+IPSET_ATTR_PAD);
 }
 
 static inline void
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 47d7d1356fa3..066a921e7cbe 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -868,20 +868,18 @@ static inline int nla_put_be64(struct sk_buff *skb, int 
attrtype, __be64 value,
return nla_put_64bit(skb, attrtype, sizeof(__be64), , padattr);
 }
 
-static inline int nla_put_be64_32bit(struct sk_buff *skb, int attrtype,
-__be64 value)
-{
-   return nla_put(skb, attrtype, sizeof(__be64), );
-}
 /**
- * nla_put_net64 - Add 64-bit network byte order netlink attribute to a socket 
buffer
+ * nla_put_net64 - Add 64-bit network byte order nlattr to a skb and align it
  * @skb: socket buffer to add attribute to
  * @attrtype: attribute type
  * @value: numeric value
+ * @padattr: attribute type for the padding
  */
-static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 
value)
+static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 
value,
+   int padattr)
 {
-   return nla_put_be64_32bit(skb, attrtype | NLA_F_NET_BYTEORDER, value);
+   return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value,
+   padattr);
 }
 
 /**
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h 
b/include/uapi/linux/netfilter/ipset/ip_set.h
index 63b2e34f1b60..ebb5154976de 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -118,6 +118,7 @@ enum {
IPSET_ATTR_SKBMARK,
IPSET_ATTR_SKBPRIO,
IPSET_ATTR_SKBQUEUE,
+   IPSET_ATTR_PAD,
__IPSET_ATTR_ADT_MAX,
 };
 #define IPSET_ATTR_ADT_MAX (__IPSET_ATTR_ADT_MAX - 1)
-- 
2.8.1

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH net-next 3/9] libnl: nla_put_be64(): align on a 64-bit area

2016-04-22 Thread Nicolas Dichtel
nla_data() is now aligned on a 64-bit area.

A temporary version (nla_put_be64_32bit()) is added for nla_put_net64().
This function is removed in the next patch.

Signed-off-by: Nicolas Dichtel 
---
 include/net/netlink.h  | 15 ++
 include/uapi/linux/fib_rules.h |  1 +
 include/uapi/linux/lwtunnel.h  |  2 ++
 include/uapi/linux/netfilter/nf_tables.h   |  8 
 include/uapi/linux/netfilter/nfnetlink_acct.h  |  1 +
 include/uapi/linux/netfilter/nfnetlink_conntrack.h |  3 +++
 include/uapi/linux/openvswitch.h   |  1 +
 net/core/fib_rules.c   |  4 ++--
 net/ipv4/ip_tunnel_core.c  | 10 +
 net/netfilter/nf_conntrack_netlink.c   | 18 +---
 net/netfilter/nf_conntrack_proto_dccp.c|  4 +++-
 net/netfilter/nf_tables_api.c  | 24 ++
 net/netfilter/nf_tables_trace.c|  5 +++--
 net/netfilter/nfnetlink_acct.c |  9 +---
 net/netfilter/nft_counter.c|  6 --
 net/netfilter/nft_dynset.c |  3 ++-
 net/netfilter/nft_limit.c  |  6 --
 net/openvswitch/flow_netlink.c |  5 +++--
 18 files changed, 87 insertions(+), 38 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 7f6b99483ab7..47d7d1356fa3 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -856,16 +856,23 @@ static inline int nla_put_u64(struct sk_buff *skb, int 
attrtype, u64 value)
 }
 
 /**
- * nla_put_be64 - Add a __be64 netlink attribute to a socket buffer
+ * nla_put_be64 - Add a __be64 netlink attribute to a socket buffer and align 
it
  * @skb: socket buffer to add attribute to
  * @attrtype: attribute type
  * @value: numeric value
+ * @padattr: attribute type for the padding
  */
-static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value)
+static inline int nla_put_be64(struct sk_buff *skb, int attrtype, __be64 value,
+  int padattr)
 {
-   return nla_put(skb, attrtype, sizeof(__be64), );
+   return nla_put_64bit(skb, attrtype, sizeof(__be64), , padattr);
 }
 
+static inline int nla_put_be64_32bit(struct sk_buff *skb, int attrtype,
+__be64 value)
+{
+   return nla_put(skb, attrtype, sizeof(__be64), );
+}
 /**
  * nla_put_net64 - Add 64-bit network byte order netlink attribute to a socket 
buffer
  * @skb: socket buffer to add attribute to
@@ -874,7 +881,7 @@ static inline int nla_put_be64(struct sk_buff *skb, int 
attrtype, __be64 value)
  */
 static inline int nla_put_net64(struct sk_buff *skb, int attrtype, __be64 
value)
 {
-   return nla_put_be64(skb, attrtype | NLA_F_NET_BYTEORDER, value);
+   return nla_put_be64_32bit(skb, attrtype | NLA_F_NET_BYTEORDER, value);
 }
 
 /**
diff --git a/include/uapi/linux/fib_rules.h b/include/uapi/linux/fib_rules.h
index 96161b8202b5..620c8a5ddc00 100644
--- a/include/uapi/linux/fib_rules.h
+++ b/include/uapi/linux/fib_rules.h
@@ -49,6 +49,7 @@ enum {
FRA_TABLE,  /* Extended table id */
FRA_FWMASK, /* mask for netfilter mark */
FRA_OIFNAME,
+   FRA_PAD,
__FRA_MAX
 };
 
diff --git a/include/uapi/linux/lwtunnel.h b/include/uapi/linux/lwtunnel.h
index f8b01887a495..a478fe80e203 100644
--- a/include/uapi/linux/lwtunnel.h
+++ b/include/uapi/linux/lwtunnel.h
@@ -22,6 +22,7 @@ enum lwtunnel_ip_t {
LWTUNNEL_IP_TTL,
LWTUNNEL_IP_TOS,
LWTUNNEL_IP_FLAGS,
+   LWTUNNEL_IP_PAD,
__LWTUNNEL_IP_MAX,
 };
 
@@ -35,6 +36,7 @@ enum lwtunnel_ip6_t {
LWTUNNEL_IP6_HOPLIMIT,
LWTUNNEL_IP6_TC,
LWTUNNEL_IP6_FLAGS,
+   LWTUNNEL_IP6_PAD,
__LWTUNNEL_IP6_MAX,
 };
 
diff --git a/include/uapi/linux/netfilter/nf_tables.h 
b/include/uapi/linux/netfilter/nf_tables.h
index eeffde196f80..660231363bb5 100644
--- a/include/uapi/linux/netfilter/nf_tables.h
+++ b/include/uapi/linux/netfilter/nf_tables.h
@@ -182,6 +182,7 @@ enum nft_chain_attributes {
NFTA_CHAIN_USE,
NFTA_CHAIN_TYPE,
NFTA_CHAIN_COUNTERS,
+   NFTA_CHAIN_PAD,
__NFTA_CHAIN_MAX
 };
 #define NFTA_CHAIN_MAX (__NFTA_CHAIN_MAX - 1)
@@ -206,6 +207,7 @@ enum nft_rule_attributes {
NFTA_RULE_COMPAT,
NFTA_RULE_POSITION,
NFTA_RULE_USERDATA,
+   NFTA_RULE_PAD,
__NFTA_RULE_MAX
 };
 #define NFTA_RULE_MAX  (__NFTA_RULE_MAX - 1)
@@ -308,6 +310,7 @@ enum nft_set_attributes {
NFTA_SET_TIMEOUT,
NFTA_SET_GC_INTERVAL,
NFTA_SET_USERDATA,
+   NFTA_SET_PAD,
__NFTA_SET_MAX
 };
 #define NFTA_SET_MAX   (__NFTA_SET_MAX - 1)
@@ -341,6 +344,7 @@ enum nft_set_elem_attributes {
NFTA_SET_ELEM_EXPIRATION,

[ovs-dev] [PATCH net-next 6/9] libnl: nla_put_msecs(): align on a 64-bit area

2016-04-22 Thread Nicolas Dichtel
nla_data() is now aligned on a 64-bit area.

Signed-off-by: Nicolas Dichtel 
---
 include/net/netlink.h| 11 +++
 include/uapi/linux/l2tp.h|  1 +
 include/uapi/linux/neighbour.h   |  2 ++
 include/uapi/linux/tcp_metrics.h |  1 +
 net/core/neighbour.c | 19 ++-
 net/ipv4/tcp_metrics.c   |  6 --
 net/l2tp/l2tp_netlink.c  |  3 ++-
 7 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 074215a59d19..113b483b6ee8 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -106,7 +106,8 @@
  *   padattr)  add s64 attribute to skb
  *   nla_put_string(skb, type, str)add string attribute to skb
  *   nla_put_flag(skb, type)   add flag attribute to skb
- *   nla_put_msecs(skb, type, jiffies) add msecs attribute to skb
+ *   nla_put_msecs(skb, type, jiffies,
+ * padattr)add msecs attribute to skb
  *   nla_put_in_addr(skb, type, addr)  add IPv4 address attribute to skb
  *   nla_put_in6_addr(skb, type, addr) add IPv6 address attribute to skb
  *
@@ -965,16 +966,18 @@ static inline int nla_put_flag(struct sk_buff *skb, int 
attrtype)
 }
 
 /**
- * nla_put_msecs - Add a msecs netlink attribute to a socket buffer
+ * nla_put_msecs - Add a msecs netlink attribute to a skb and align it
  * @skb: socket buffer to add attribute to
  * @attrtype: attribute type
  * @njiffies: number of jiffies to convert to msecs
+ * @padattr: attribute type for the padding
  */
 static inline int nla_put_msecs(struct sk_buff *skb, int attrtype,
-   unsigned long njiffies)
+   unsigned long njiffies, int padattr)
 {
u64 tmp = jiffies_to_msecs(njiffies);
-   return nla_put(skb, attrtype, sizeof(u64), );
+
+   return nla_put_64bit(skb, attrtype, sizeof(u64), , padattr);
 }
 
 /**
diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 347ef22a964e..3386a99e0397 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -126,6 +126,7 @@ enum {
L2TP_ATTR_IP6_DADDR,/* struct in6_addr */
L2TP_ATTR_UDP_ZERO_CSUM6_TX,/* u8 */
L2TP_ATTR_UDP_ZERO_CSUM6_RX,/* u8 */
+   L2TP_ATTR_PAD,
__L2TP_ATTR_MAX,
 };
 
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 788655bfa0f3..bd99a8d80f36 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -128,6 +128,7 @@ enum {
NDTPA_LOCKTIME, /* u64, msecs */
NDTPA_QUEUE_LENBYTES,   /* u32 */
NDTPA_MCAST_REPROBES,   /* u32 */
+   NDTPA_PAD,
__NDTPA_MAX
 };
 #define NDTPA_MAX (__NDTPA_MAX - 1)
@@ -160,6 +161,7 @@ enum {
NDTA_PARMS, /* nested TLV NDTPA_* */
NDTA_STATS, /* struct ndt_stats, read-only */
NDTA_GC_INTERVAL,   /* u64, msecs */
+   NDTA_PAD,
__NDTA_MAX
 };
 #define NDTA_MAX (__NDTA_MAX - 1)
diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h
index 93533926035c..80ad90d0cfc2 100644
--- a/include/uapi/linux/tcp_metrics.h
+++ b/include/uapi/linux/tcp_metrics.h
@@ -40,6 +40,7 @@ enum {
TCP_METRICS_ATTR_FOPEN_COOKIE,  /* binary */
TCP_METRICS_ATTR_SADDR_IPV4,/* u32 */
TCP_METRICS_ATTR_SADDR_IPV6,/* binary */
+   TCP_METRICS_ATTR_PAD,
 
__TCP_METRICS_ATTR_MAX,
 };
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f18ae91b652e..6a395d440228 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1763,21 +1763,22 @@ static int neightbl_fill_parms(struct sk_buff *skb, 
struct neigh_parms *parms)
NEIGH_VAR(parms, MCAST_PROBES)) ||
nla_put_u32(skb, NDTPA_MCAST_REPROBES,
NEIGH_VAR(parms, MCAST_REPROBES)) ||
-   nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
+   nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time,
+ NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
- NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
+ NEIGH_VAR(parms, BASE_REACHABLE_TIME), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_GC_STALETIME,
- NEIGH_VAR(parms, GC_STALETIME)) ||
+ NEIGH_VAR(parms, GC_STALETIME), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
- NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
+ NEIGH_VAR(parms, DELAY_PROBE_TIME), NDTPA_PAD) ||
nla_put_msecs(skb, NDTPA_RETRANS_TIME,
- NEIGH_VAR(parms, RETRANS_TIME)) ||
+ NEIGH_VAR(parms, RETRANS_TIME), 

Re: [ovs-dev] [RFC PATCH] xlate: Use dp_hash for select groups.

2016-04-22 Thread Ben Pfaff
On Mon, Apr 18, 2016 at 05:42:53PM -0700, Jarno Rajahalme wrote:
> Add a new select group selection method "dp_hash", which uses minimal
> number of bits from the datapath calculated packet hash to inform the
> select group bucket selection.  This makes the datapath flows more
> generic resulting in less upcalls to userspace, but adds recirculation
> prior to group selection.
> 
> Signed-off-by: Jarno Rajahalme 

How can we explain this to users?  I don't think it tries, yet--I don't
see any documentation updates.

Thanks,

Ben.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH] datapath-windows: Add paranthesis to fix error C2275

2016-04-22 Thread Ben Pfaff
On Thu, Apr 21, 2016 at 10:44:26PM -0700, Sairam Venugopal wrote:
> Add braces around the if condition to prevent Visual Studio from giving
> the "error C2275: illegal use of this type as an expresion". This happens
> when a variable is declared after a block. This error occurs on certain
> versions of compilers.
> 
> Signed-off-by: Sairam Venugopal 

What a bizarre compiler bug!

Applied, thanks.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] How and when are IP Addresses assigned to the switch interfaces

2016-04-22 Thread Ben Pfaff
This is unreadable without any line breaks.

On Fri, Apr 22, 2016 at 03:15:03PM +, Pynbiang Hadem wrote:
> Hi,
> I'm still a learner of openvswitch and mininet. Pls help clarify my doubts.
> I am running a mininet instance with the following 
> command:mininet@mininet-vm: sudo mn --topo linear,3 --switch 
> ovsk,datapath=user --controller remote
> 
> When i run   mininet@mininet-vm:~$ sudo ovs-vsctl show    command, i get the 
> following output:0b8ed0aa-67ac-4405-af13-70249a7e8a96    Bridge "s1"        
> Controller "tcp:127.0.0.1:6633"            is_connected: true        
> Controller "ptcp:6634"        fail_mode: secure        Port "s1-eth2"         
>    Interface "s1-eth2"        Port "s1-eth1"            Interface "s1-eth1"   
>      Port "s1"            Interface "s1"                type: internal    
> Bridge "s3"    ...There is no IP information shown for the interfaces 
> "s1-eth1", "s1-eth2" etc. 
> Even the dump-flows command does not show any IP 
> Information:mininet@mininet-vm:~$ sudo ovs-ofctl -O OpenFlow13 dump-flows 
> s1OFPST_FLOW reply (OF1.3) (xid=0x2): cookie=0x0, duration=1627.369s, 
> table=0, n_packets=28, n_bytes=2668, priority=1           
> ,in_port=2,dl_dst=fe:7a:37:ab:ce:e5 actions=output:1 cookie=0x0, 
> duration=1627.368s, table=0, n_packets=27, n_bytes=2608, priority=1           
> ,in_port=1,dl_dst=32:a3:fe:e0:d9:c4 actions=output:2 cookie=0x0, 
> duration=2513.265s, table=0, n_packets=3, n_bytes=218, priority=0 a           
> ctions=CONTROLLER:65535
> However the the ping connectivity was successful with the simple_switch_13  
> apps from ryu executed as follows:       mininet@mininet-vm:~/ryu/ryu/app$ 
> ryu-manager --verbose simple_switch_13.pyHow and When are IPs assigned to the 
> switch interfaces?. ORDoes the whole thing work without IP addresses on the 
> interfaces?. If so, how can i assign IP addresses to the interfaces. I need 
> to use the IP addresses of the interfaces in my controller application.
> ThanksHadem.
> ___
> dev mailing list
> dev@openvswitch.org
> http://openvswitch.org/mailman/listinfo/dev
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 2/2] ovn-controller: add local address for ovn-remote connection

2016-04-22 Thread Ben Pfaff
On Fri, Apr 22, 2016 at 04:14:25PM +0800, Huang Lei wrote:
> From: Huang Lei 
> 
> Add a local address option for ovn-controller's TCP socket which
> connect to southbound ovsdb-server.
> 
> In a test environment, an interface may have multiple IP addresses
> in same subnet, if TCP client socket doesn't call bind() explicitly,
> OS chooses an ip and port for it, and the IP is the primary IP of
> the subnet on an interface. With this patch, a secondary IP of the
> subnet can be used as local address of TCP connection.
> 
> Signed-off-by: Huang Lei 

Why not just extend the syntax for streams to allow a local address (and
possibly local port) to be specified?
e.g. remote-ip:remote-port:local-ip:local-port.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH 1/2] lib: parse_sockaddr_components() ignores bad port

2016-04-22 Thread Ben Pfaff
On Fri, Apr 22, 2016 at 04:14:24PM +0800, Huang Lei wrote:
> From: Huang Lei 
> 
> Bad port number erro is ignored in parse_sockaddr_components(),
> if port number is invalid, it ouputs a error log and set port
> to 0.
> 
> Signed-off-by: Huang Lei 

Applied, thanks!
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] How and when are IP Addresses assigned to the switch interfaces

2016-04-22 Thread Pynbiang Hadem
Hi,
I'm still a learner of openvswitch and mininet. Pls help clarify my doubts.
I am running a mininet instance with the following command:mininet@mininet-vm: 
sudo mn --topo linear,3 --switch ovsk,datapath=user --controller remote

When i run   mininet@mininet-vm:~$ sudo ovs-vsctl show    command, i get the 
following output:0b8ed0aa-67ac-4405-af13-70249a7e8a96    Bridge "s1"        
Controller "tcp:127.0.0.1:6633"            is_connected: true        Controller 
"ptcp:6634"        fail_mode: secure        Port "s1-eth2"            Interface 
"s1-eth2"        Port "s1-eth1"            Interface "s1-eth1"        Port "s1" 
           Interface "s1"                type: internal    Bridge "s3"    
...There is no IP information shown for the interfaces "s1-eth1", "s1-eth2" 
etc. 
Even the dump-flows command does not show any IP 
Information:mininet@mininet-vm:~$ sudo ovs-ofctl -O OpenFlow13 dump-flows 
s1OFPST_FLOW reply (OF1.3) (xid=0x2): cookie=0x0, duration=1627.369s, table=0, 
n_packets=28, n_bytes=2668, priority=1           
,in_port=2,dl_dst=fe:7a:37:ab:ce:e5 actions=output:1 cookie=0x0, 
duration=1627.368s, table=0, n_packets=27, n_bytes=2608, priority=1           
,in_port=1,dl_dst=32:a3:fe:e0:d9:c4 actions=output:2 cookie=0x0, 
duration=2513.265s, table=0, n_packets=3, n_bytes=218, priority=0 a           
ctions=CONTROLLER:65535
However the the ping connectivity was successful with the simple_switch_13  
apps from ryu executed as follows:       mininet@mininet-vm:~/ryu/ryu/app$ 
ryu-manager --verbose simple_switch_13.pyHow and When are IPs assigned to the 
switch interfaces?. ORDoes the whole thing work without IP addresses on the 
interfaces?. If so, how can i assign IP addresses to the interfaces. I need to 
use the IP addresses of the interfaces in my controller application.
ThanksHadem.
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v8 08/16] dpif-netdev: Add pmd thread local port cache for transmission.

2016-04-22 Thread Ilya Maximets
Without this we will lost connection to non-pmd ports:
--
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 26ef612..be1f291 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3359,9 +3359,7 @@ dp_netdev_reset_pmd_threads(struct dp_netdev *dp)
 struct hmapx_node *node;
 
 HMAP_FOR_EACH (port, node, >ports) {
-if (netdev_is_pmd(port->netdev)) {
-dp_netdev_add_port_to_pmds__(dp, port, _reload);
-}
+dp_netdev_add_port_to_pmds__(dp, port, _reload);
 }
 
 HMAPX_FOR_EACH (node, _reload) {
--

Best regards, Ilya Maximets.

On 20.04.2016 01:28, diproiettod at vmware.com (Daniele Di Proietto) wrote:
> A future commit will stop using RCU for 'dp->ports' and use a mutex for
> reading/writing them.  To avoid taking a mutex in dp_execute_cb(), which
> is called in the fast path, this commit introduces a pmd thread local
> cache of ports.
> 
> The downside is that every port add/remove now needs to synchronize with
> every pmd thread.
> 
> Among the advantages, keeping a per thread port mapping could allow
> greater control over the txq assigment.
> 
> Signed-off-by: Daniele Di Proietto 
> ---
>  lib/dpif-netdev.c | 249 
> +++---
>  1 file changed, 179 insertions(+), 70 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index cedaf39..bd2249e 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -184,6 +184,7 @@ static bool dpcls_lookup(const struct dpcls *cls,
>   *
>   *dp_netdev_mutex (global)
>   *port_mutex
> + *non_pmd_mutex
>   */
>  struct dp_netdev {
>  const struct dpif_class *const class;
> @@ -379,6 +380,13 @@ struct rxq_poll {
>  struct ovs_list node;
>  };
>  
> +/* Contained by struct dp_netdev_pmd_thread's 'port_cache' or 'tx_ports'. */
> +struct tx_port {
> +odp_port_t port_no;
> +struct netdev *netdev;
> +struct hmap_node node;
> +};
> +
>  /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
>   * the performance overhead of interrupt processing.  Therefore netdev can
>   * not implement rx-wait for these devices.  dpif-netdev needs to poll
> @@ -405,8 +413,8 @@ struct dp_netdev_pmd_thread {
>  
>  /* Per thread exact-match cache.  Note, the instance for cpu core
>   * NON_PMD_CORE_ID can be accessed by multiple threads, and thusly
> - * need to be protected (e.g. by 'dp_netdev_mutex').  All other
> - * instances will only be accessed by its own pmd thread. */
> + * need to be protected by 'non_pmd_mutex'.  Every other instance
> + * will only be accessed by its own pmd thread. */
>  struct emc_cache flow_cache;
>  
>  /* Classifier and Flow-Table.
> @@ -435,10 +443,20 @@ struct dp_netdev_pmd_thread {
>  atomic_int tx_qid;  /* Queue id used by this pmd thread to
>   * send packets on all netdevs */
>  
> -struct ovs_mutex poll_mutex;/* Mutex for poll_list. */
> +struct ovs_mutex port_mutex;/* Mutex for 'poll_list' and 'tx_ports'. 
> */
>  /* List of rx queues to poll. */
>  struct ovs_list poll_list OVS_GUARDED;
> -int poll_cnt;   /* Number of elemints in poll_list. */
> +/* Number of elements in 'poll_list' */
> +int poll_cnt;
> +/* Map of 'tx_port's used for transmission.  Written by the main thread,
> + * read by the pmd thread. */
> +struct hmap tx_ports OVS_GUARDED;
> +
> +/* Map of 'tx_port' used in the fast path. This is a thread-local copy of
> + * 'tx_ports'. The instance for cpu core NON_PMD_CORE_ID can be accessed
> + * by multiple threads, and thusly need to be protected by 
> 'non_pmd_mutex'.
> + * Every other instance will only be accessed by its own pmd thread. */
> +struct hmap port_cache;
>  
>  /* Only a pmd thread can write on its own 'cycles' and 'stats'.
>   * The main thread keeps 'stats_zero' and 'cycles_zero' as base
> @@ -494,7 +512,7 @@ dp_netdev_pmd_get_next(struct dp_netdev *dp, struct 
> cmap_position *pos);
>  static void dp_netdev_destroy_all_pmds(struct dp_netdev *dp);
>  static void dp_netdev_del_pmds_on_numa(struct dp_netdev *dp, int numa_id);
>  static void dp_netdev_set_pmds_on_numa(struct dp_netdev *dp, int numa_id);
> -static void dp_netdev_pmd_clear_poll_list(struct dp_netdev_pmd_thread *pmd);
> +static void dp_netdev_pmd_clear_ports(struct dp_netdev_pmd_thread *pmd);
>  static void dp_netdev_del_port_from_all_pmds(struct dp_netdev *dp,
>   struct dp_netdev_port *port);
>  static void
> @@ -508,6 +526,8 @@ static void dp_netdev_reset_pmd_threads(struct dp_netdev 
> *dp);
>  static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
>  static void dp_netdev_pmd_unref(struct dp_netdev_pmd_thread *pmd);
>  static void dp_netdev_pmd_flow_flush(struct dp_netdev_pmd_thread 

Re: [ovs-dev] [PATCH v2 2/2] ofproto-dpif: Do not count resubmit to later tables against limit.

2016-04-22 Thread Ryan Moats
"dev"  wrote on 04/21/2016 12:50:17 PM:

> From: Ben Pfaff 
> To: dev@openvswitch.org
> Cc: Ben Pfaff , Guru Shetty 
> Date: 04/21/2016 12:50 PM
> Subject: [ovs-dev] [PATCH v2 2/2] ofproto-dpif: Do not count
> resubmit to later tables against limit.
> Sent by: "dev" 
>
> Open vSwitch must ensure that flow translation takes a finite amount of
> time.  Until now it has implemented this by limiting the depth of
> recursion.  The initial limit, in version 1.0.1, was no recursion at all,
> and then over the years it has increased to 8 levels, then 16, then 32,
> and 64 for the last few years.  Now reports are coming in that 64 levels
> are inadequate for some OVN setups.  The natural inclination would be to
> double the limit again to 128 levels.
>
> This commit attempts another approach.  Instead of increasing the limit,
> it reduces the class of resubmits that count against the limit.  Since
the
> goal for the depth limit is to prevent an infinite amount of work, it's
> not necessary to count resubmits that can't lead to infinite work.  In
> particular, a resubmit from a table numbered x to a table y > x cannot do
> this, because any OpenFlow switch has a finite number of tables.  Because
> in fact a resubmit (or goto_table) from one table to a later table is the
> most common form of an OpenFlow pipeline, I suspect that this will
greatly
> alleviate the pressure to increase the depth limit.
>
> Reported-by: Guru Shetty 
> Signed-off-by: Ben Pfaff 
> ---
>  lib/ofp-actions.c| 19 +++--
>  ofproto/ofproto-dpif-xlate.c | 66 +
> +--
>  ofproto/ofproto-dpif-xlate.h | 23 +--
>  ofproto/ofproto-dpif.c   |  5 ++--
>  ofproto/ofproto-dpif.h   |  3 +-
>  tests/ofproto-dpif.at| 41 ++-
>  utilities/ovs-ofctl.8.in | 28 ---
>  7 files changed, 152 insertions(+), 33 deletions(-)

I believe I'm seeing some issues with the recursion level as well, so I'd
like
to get this in and see if it helps...

Acked-by: Ryan Moats 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH v2 1/2] ofproto-dpif: Rename "recurse" to"indentation".

2016-04-22 Thread Ryan Moats
"dev"  wrote on 04/21/2016 12:50:16 PM:

> From: Ben Pfaff 
> To: dev@openvswitch.org
> Cc: Ben Pfaff 
> Date: 04/21/2016 12:50 PM
> Subject: [ovs-dev] [PATCH v2 1/2] ofproto-dpif: Rename "recurse" to
> "indentation".
> Sent by: "dev" 
>
> The "recurse" member of struct xlate_in and struct xlate_ctx is used for
> two purposes: to determine the amount of indentation in "ofproto/trace"
> output and to limit the depth of recursion.  An upcoming commit will
> separate these tasks, and so in preparation this commit renames "recurse"
> to "indentation".
>
> Signed-off-by: Ben Pfaff 
> ---
>  ofproto/ofproto-dpif-xlate.c | 22 +++---
>  ofproto/ofproto-dpif-xlate.h | 15 ---
>  ofproto/ofproto-dpif.c   | 43 +
+-
>  ofproto/ofproto-dpif.h   |  2 +-

Much better...

Acked-by: Ryan Moats 
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH] lib: protect daemon_set_new_user against non existing user:group specs

2016-04-22 Thread Christian Ehrhardt
From the manpages of getgrnam_r (getpwnam_r is similar):
"If no matching group record was found, these functions return 0 and
store NULL in *result."

The code checked only against errors, but non existing users didn't set
e != 0 therefore the code could try to set arbitrary uid/gid values.

Fixes: e91b927d lib/daemon: support --user option for all OVS daemon

Signed-off-by: Christian Ehrhardt 
---
 lib/daemon-unix.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/lib/daemon-unix.c b/lib/daemon-unix.c
index 182f76b..bbd1fd7 100644
--- a/lib/daemon-unix.c
+++ b/lib/daemon-unix.c
@@ -972,6 +972,9 @@ daemon_set_new_user(const char *user_spec)
 VLOG_FATAL("%s: Failed to retrive user %s's uid (%s), aborting.",
pidfile, user, ovs_strerror(e));
 }
+if (*res == NULL) {
+VLOG_FATAL("%s: user %s not found, aborting.", pidfile, user);
+}
 } else {
 /* User name is not specified, use current user.  */
 while ((e = getpwuid_r(uid, , buf, bufsize, )) == ERANGE) {
@@ -1012,6 +1015,10 @@ daemon_set_new_user(const char *user_spec)
"(%s), aborting.", pidfile, grpstr,
ovs_strerror(e));
 }
+if (*res == NULL) {
+VLOG_FATAL("%s: group %s not found, aborting.", pidfile,
+   grpstr);
+}
 
 if (gid != grp.gr_gid) {
 char **mem;
-- 
2.7.4

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH v2] packets: use flow protocol when recalculating ipv6 checksums

2016-04-22 Thread Simon Horman
When using masked actions the ipv6_proto field of an action
to set IPv6 fields may be zero rather than the prevailing protocol
which will result in skipping checksum recalculation.

This patch resolves the problem by relying on the protocol
in the packet rather than that in the set field action.

A similar fix for the kernel datapath has been accepted into David Miller's
'net' tree as b4f70527f052 ("openvswitch: use flow protocol when
recalculating ipv6 checksums").

Cc: Jarno Rajahalme 
Fixes: 6d670e7f0d45 ("lib/odp: Masked set action execution and printing.")
Signed-off-by: Simon Horman 
---
While preparing this I noticed that there does seem to be some scope
to consolidate packet_rh_present() and part of miniflow_extract().

v2
* Updated changelog to refer to protocol in packet, as this patch does,
  rather than the flow key, which the kernel datapath variant of the patch
  does. This was a copy-paste error in preparing the changelog.
---
 lib/odp-execute.c |  4 +---
 lib/packets.c | 39 +--
 lib/packets.h |  2 +-
 3 files changed, 23 insertions(+), 22 deletions(-)

diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index b5204b270677..7efd9ec1d349 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -99,7 +99,6 @@ odp_set_ipv6(struct dp_packet *packet, const struct 
ovs_key_ipv6 *key,
 
 packet_set_ipv6(
 packet,
-key->ipv6_proto,
 mask_ipv6_addr(nh->ip6_src.be32, key->ipv6_src, mask->ipv6_src, sbuf),
 mask_ipv6_addr(nh->ip6_dst.be32, key->ipv6_dst, mask->ipv6_dst, dbuf),
 key->ipv6_tclass | (old_tc & ~mask->ipv6_tclass),
@@ -257,8 +256,7 @@ odp_execute_set_action(struct dp_packet *packet, const 
struct nlattr *a)
 
 case OVS_KEY_ATTR_IPV6:
 ipv6_key = nl_attr_get_unspec(a, sizeof(struct ovs_key_ipv6));
-packet_set_ipv6(packet, ipv6_key->ipv6_proto,
-ipv6_key->ipv6_src, ipv6_key->ipv6_dst,
+packet_set_ipv6(packet, ipv6_key->ipv6_src, ipv6_key->ipv6_dst,
 ipv6_key->ipv6_tclass, ipv6_key->ipv6_label,
 ipv6_key->ipv6_hlimit);
 break;
diff --git a/lib/packets.c b/lib/packets.c
index d0c0e68b534d..962fbdb6913c 100644
--- a/lib/packets.c
+++ b/lib/packets.c
@@ -837,10 +837,9 @@ packet_set_ipv4_addr(struct dp_packet *packet,
  *
  * This function assumes that L3 and L4 offsets are set in the packet. */
 static bool
-packet_rh_present(struct dp_packet *packet)
+packet_rh_present(struct dp_packet *packet, uint8_t *nexthdr)
 {
 const struct ovs_16aligned_ip6_hdr *nh;
-int nexthdr;
 size_t len;
 size_t remaining;
 uint8_t *data = dp_packet_l3(packet);
@@ -852,14 +851,14 @@ packet_rh_present(struct dp_packet *packet)
 nh = ALIGNED_CAST(struct ovs_16aligned_ip6_hdr *, data);
 data += sizeof *nh;
 remaining -= sizeof *nh;
-nexthdr = nh->ip6_nxt;
+*nexthdr = nh->ip6_nxt;
 
 while (1) {
-if ((nexthdr != IPPROTO_HOPOPTS)
-&& (nexthdr != IPPROTO_ROUTING)
-&& (nexthdr != IPPROTO_DSTOPTS)
-&& (nexthdr != IPPROTO_AH)
-&& (nexthdr != IPPROTO_FRAGMENT)) {
+if ((*nexthdr != IPPROTO_HOPOPTS)
+&& (*nexthdr != IPPROTO_ROUTING)
+&& (*nexthdr != IPPROTO_DSTOPTS)
+&& (*nexthdr != IPPROTO_AH)
+&& (*nexthdr != IPPROTO_FRAGMENT)) {
 /* It's either a terminal header (e.g., TCP, UDP) or one we
  * don't understand.  In either case, we're done with the
  * packet, so use it to fill in 'nw_proto'. */
@@ -875,34 +874,34 @@ packet_rh_present(struct dp_packet *packet)
 return false;
 }
 
-if (nexthdr == IPPROTO_AH) {
+if (*nexthdr == IPPROTO_AH) {
 /* A standard AH definition isn't available, but the fields
  * we care about are in the same location as the generic
  * option header--only the header length is calculated
  * differently. */
 const struct ip6_ext *ext_hdr = (struct ip6_ext *)data;
 
-nexthdr = ext_hdr->ip6e_nxt;
+*nexthdr = ext_hdr->ip6e_nxt;
 len = (ext_hdr->ip6e_len + 2) * 4;
-} else if (nexthdr == IPPROTO_FRAGMENT) {
+} else if (*nexthdr == IPPROTO_FRAGMENT) {
 const struct ovs_16aligned_ip6_frag *frag_hdr
 = ALIGNED_CAST(struct ovs_16aligned_ip6_frag *, data);
 
-nexthdr = frag_hdr->ip6f_nxt;
+*nexthdr = frag_hdr->ip6f_nxt;
 len = sizeof *frag_hdr;
-} else if (nexthdr == IPPROTO_ROUTING) {
+} else if (*nexthdr == IPPROTO_ROUTING) {
 const struct ip6_rthdr *rh = (struct ip6_rthdr *)data;
 
 if (rh->ip6r_segleft > 0) {
 return true;
 }
 
-nexthdr = rh->ip6r_nxt;
+  

[ovs-dev] [PATCH v4] debian : upstream_version fix

2016-04-22 Thread Zoltán Balogh
From:  

The Debian Policy Manual 
(https://www.debian.org/doc/debian-policy/ch-controlfields.html#s-f-Version) 
says that the upstream_version may contain only alphanumerics and the 
characters . + - : ~ (full stop, plus, hyphen, colon, tilde) and should start 
with a digit.

Currently, the upstream_version is defined in the debian/rules file:

DEB_UPSTREAM_VERSION=$(shell dpkg-parsechangelog | sed -rne 's,^Version: 
([0-9]:)*([^-]+).*,\2,p')

The version number is taken from the dpkg-parsechangelog printout then the 
first part of the version number which does not contain hyphen is filtered out 
with sed. However the Debian Policy Manual says that hyphen is allowed in the 
upstream_version. 

This is not a problem with current vanilla OVS debian version. But, if a 
postfix string including a hyphen is added to the upstream_version then 
installation of datapath-dkms package will fail. 

Reported-by: Zoltán Balogh 
Tested-by: Zoltán Balogh 

---

diff --git a/debian/rules b/debian/rules
index 2a70bd6..7110851 100755
--- a/debian/rules
+++ b/debian/rules
@@ -13,7 +13,7 @@
 
 PACKAGE=openvswitch
 PACKAGE_DKMS=openvswitch-datapath-dkms
-DEB_UPSTREAM_VERSION=$(shell dpkg-parsechangelog | sed -rne 's,^Version: 
([0-9]:)*([^-]+).*,\2,p')
+DEB_UPSTREAM_VERSION=$(shell dpkg-parsechangelog | sed -rne 's,^Version: 
([0-9]+:)?([0-9][a-zA-Z0-9.+:~-]*)(-[a-zA-Z0-9*.~]*),\2,p')
 
 ifneq (,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))
 PARALLEL = -j$(patsubst parallel=%,%,$(filter parallel=%,$(DEB_BUILD_OPTIONS)))

___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] Helpdesk

2016-04-22 Thread Karin Kallen



Van: Karin Kallen
Verzonden: vrijdag 22 april 2016 10:44
Aan: Karin Kallen
Onderwerp: Helpdesk

Geachte Account gebruiker,

Helpdesk moet u aan te melden van de geldigheid van uw account opnieuw te 
valideren HIER opnieuw valideren 
van uw account voor HTK4S Anti-Virus / Anti-Spam en laat New mails te Nu komen

ITS helpdesk
Admin team
Verbonden met Microsoft Exchange
© 2016 Microsoft Corporation. Alle rechten voorbehouden


Pantein staat bij de Kamer van Koophandel ingeschreven onder nummer 17137132.
BELANGRIJK: Dit document en de eventuele bijlagen is/zijn uitsluitend bestemd 
voor de hierboven genoemde geadresseerde(n) en kan/kunnen vertrouwelijke 
informatie bevatten.
Indien u niet de geadresseerde bent, gelieve u zich te onthouden van 
verspreiding of vermenigvuldiging van dit bericht. Indien dit bericht kennelijk 
per vergissing naar u is verzonden, verzoeken wij u dit onverwijld te berichten 
aan de verzender van dit bericht en dit bericht per omgaande te vernietigen.
*** Barracuda Spam & Virus Firewall heeft dit bericht gescand op virussen, 
misbruik en ongewenste inhoud. ***
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] Helpdesk

2016-04-22 Thread Karin Kallen



Van: Karin Kallen
Verzonden: vrijdag 22 april 2016 10:44
Aan: Karin Kallen
Onderwerp: Helpdesk

Geachte Account gebruiker,

Helpdesk moet u aan te melden van de geldigheid van uw account opnieuw te 
valideren HIER opnieuw valideren 
van uw account voor HTK4S Anti-Virus / Anti-Spam en laat New mails te Nu komen

ITS helpdesk
Admin team
Verbonden met Microsoft Exchange
© 2016 Microsoft Corporation. Alle rechten voorbehouden


Pantein staat bij de Kamer van Koophandel ingeschreven onder nummer 17137132.
BELANGRIJK: Dit document en de eventuele bijlagen is/zijn uitsluitend bestemd 
voor de hierboven genoemde geadresseerde(n) en kan/kunnen vertrouwelijke 
informatie bevatten.
Indien u niet de geadresseerde bent, gelieve u zich te onthouden van 
verspreiding of vermenigvuldiging van dit bericht. Indien dit bericht kennelijk 
per vergissing naar u is verzonden, verzoeken wij u dit onverwijld te berichten 
aan de verzender van dit bericht en dit bericht per omgaande te vernietigen.
*** Barracuda Spam & Virus Firewall heeft dit bericht gescand op virussen, 
misbruik en ongewenste inhoud. ***
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


Re: [ovs-dev] [PATCH RFC] dpif-netdev: ACL+dpcls for Wildcard matching.

2016-04-22 Thread Fischetti, Antonio
Hi Ben,
below are 2 examples.

For both cases:
   * EMC was bypassed
   * using a bridge with 2 dpdk ports
   * I've sent data at line rate on one port and just read the received rate on 
the other port,
  regardless of lost packets.


Case A: 7 Flows

Original dpcls:   5.74 Mpps
ACL + dpcls:   7.03 Mpps

The 7 Flows were installed as:
ovs-ofctl add-flow br0 
dl_type=0x0800,nw_src=17.18.19.20,nw_dst=34.35.36.37,action=output:2
ovs-ofctl add-flow br0 dl_type=0x0800,nw_src=17.18.19.19,action=output:2
ovs-ofctl add-flow br0 dl_type=0x0800,nw_src=17.18.19.18,action=output:2
ovs-ofctl add-flow br0 dl_type=0x0800,nw_src=17.18.19.17,action=output:2
ovs-ofctl add-flow br0 dl_type=0x0800,nw_src=17.18.19.16,action=output:2
ovs-ofctl add-flow br0 dl_type=0x0800,nw_src=17.18.19.15,action=output:2
ovs-ofctl add-flow br0 
dl_type=0x0800,nw_src=17.18.19.14,nw_dst=34.35.36.37,action=output:2


Case B: 17 Flows
=
Original dpcls:   2.95 Mpps
ACL+dpcls: 4.67 Mpps

The 17 Flows were installed as:
add-flow br0 
dl_type=0x0800,nw_proto=17,nw_src=17.18.19.20,nw_dst=34.35.36.37,action=output:2
add-flow br0 
dl_type=0x0800,nw_proto=17,nw_src=17.18.19.20,nw_dst=34.35.36.38,udp_dst=4369,action=output:2
add-flow br0 
dl_type=0x0800,nw_proto=17,nw_src=17.18.19.19,udp_src=4369,action=output:2
add-flow br0 dl_type=0x0800,nw_proto=17,nw_src=17.18.19.18,action=output:2
add-flow br0 
dl_type=0x0800,nw_proto=17,nw_src=17.18.19.17,udp_dst=4369,action=output:2
add-flow br0 dl_type=0x0800,nw_src=17.18.19.16,action=output:2
add-flow br0 dl_type=0x0800,nw_src=17.18.19.15,action=output:2
add-flow br0 dl_type=0x0800,nw_src=17.18.19.14,action=output:2
add-flow br0 
dl_type=0x0800,nw_proto=17,nw_src=17.18.19.13,udp_src=4369,action=output:2
add-flow br0 dl_type=0x0800,nw_proto=17,nw_src=17.18.19.10,action=output:2
add-flow br0 dl_type=0x0800,nw_src=17.18.19.9,action=output:2
add-flow br0 dl_type=0x0800,nw_src=17.18.19.8,nw_dst=34.35.36.37,action=output:2
add-flow br0 dl_type=0x0800,nw_src=17.18.19.8,nw_dst=34.35.36.38,action=output:2
add-flow br0 dl_type=0x0800,nw_proto=17,nw_src=17.18.19.7,action=output:2
add-flow br0 dl_type=0x0800,nw_proto=17,nw_src=17.18.19.6,action=output:2
add-flow br0 dl_type=0x0800,nw_proto=17,nw_dst=34.35.36.37,action=output:2
add-flow br0 dl_type=0x0800,nw_dst=34.35.36.38,action=output:2

For more details, please let me know.

Thanks,
Antonio



> -Original Message-
> From: Ben Pfaff [mailto:b...@ovn.org]
> Sent: Thursday, April 21, 2016 7:41 PM
> To: Fischetti, Antonio 
> Cc: dev@openvswitch.org
> Subject: Re: [ovs-dev] [PATCH RFC] dpif-netdev: ACL+dpcls for Wildcard
> matching.
> 
> On Wed, Apr 13, 2016 at 10:45:09AM +0100, antonio.fische...@intel.com
> wrote:
> > The purpose of this implementation is to improve the performance
> > of wildcard matching in user-space.
> > This RFC patch shows the basic functionality, some aspects were not
> > covered yet.
> >
> > I would like to get some feedback on whether people think integrating
> > the DPDK ACL table in this manner is potentially a good solution or not.
> >
> > DPDK ACL tables show a better performance on lookup operations than the
> > Classifier.  However their insertion time for new rules is unacceptable.
> > This solution attempts to combine the better performance of ACL lookups
> > with the lower insertion latency of the Classifier.
> 
> How much does the performance improve?
___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


[ovs-dev] [PATCH 1/2] lib: parse_sockaddr_components() ignores bad port

2016-04-22 Thread Huang Lei
From: Huang Lei 

Bad port number erro is ignored in parse_sockaddr_components(),
if port number is invalid, it ouputs a error log and set port
to 0.

Signed-off-by: Huang Lei 
---
 lib/socket-util.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lib/socket-util.c b/lib/socket-util.c
index 5463c93..5a36f3b 100644
--- a/lib/socket-util.c
+++ b/lib/socket-util.c
@@ -359,6 +359,7 @@ parse_sockaddr_components(struct sockaddr_storage *ss,
 if (port_s && port_s[0]) {
 if (!str_to_int(port_s, 10, ) || port < 0 || port > 65535) {
 VLOG_ERR("%s: bad port number \"%s\"", s, port_s);
+goto exit;
 }
 } else {
 port = default_port;
--
1.9.1



___
dev mailing list
dev@openvswitch.org
http://openvswitch.org/mailman/listinfo/dev


  1   2   >