[ovs-dev] [RFC 14/14] ovn-controller: Incremental processing for port-group changes.

2018-07-24 Thread Han Zhou
Signed-off-by: Han Zhou 
---
 ovn/controller/lflow.h  |   3 +-
 ovn/controller/ovn-controller.c | 110 ++--
 2 files changed, 97 insertions(+), 16 deletions(-)

diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h
index 01dda1d..b90f971 100644
--- a/ovn/controller/lflow.h
+++ b/ovn/controller/lflow.h
@@ -70,7 +70,8 @@ struct uuid;
 #define LOG_PIPELINE_LEN 24
 
 enum ref_type {
-REF_TYPE_ADDRSET
+REF_TYPE_ADDRSET,
+REF_TYPE_PORTGROUP
 };
 
 /* Maintains the relationship for a pair of named resource and
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 13bc333..1f95acb 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -330,6 +330,29 @@ port_groups_init(const struct sbrec_port_group_table 
*port_group_table,
 }
 
 static void
+port_groups_update(const struct sbrec_port_group_table *port_group_table,
+   struct shash *port_groups, struct sset *new,
+   struct sset *deleted, struct sset *updated)
+{
+const struct sbrec_port_group *pg;
+SBREC_PORT_GROUP_TABLE_FOR_EACH_TRACKED (pg, port_group_table) {
+if (sbrec_port_group_is_deleted(pg)) {
+expr_const_sets_remove(port_groups, pg->name);
+sset_add(deleted, pg->name);
+} else {
+expr_const_sets_add(port_groups, pg->name,
+(const char *const *) pg->ports,
+pg->n_ports, false);
+if (sbrec_port_group_is_new(pg)) {
+sset_add(new, pg->name);
+} else {
+sset_add(updated, pg->name);
+}
+}
+}
+}
+
+static void
 update_ssl_config(const struct ovsrec_ssl_table *ssl_table)
 {
 const struct ovsrec_ssl *ssl = ovsrec_ssl_table_first(ssl_table);
@@ -748,6 +771,30 @@ en_port_groups_run(struct engine_node *node)
 node->changed = true;
 }
 
+static bool
+port_groups_sb_port_group_handler(struct engine_node *node)
+{
+struct ed_type_port_groups *pg = (struct ed_type_port_groups *)node->data;
+
+sset_clear(>new);
+sset_clear(>deleted);
+sset_clear(>updated);
+
+struct sbrec_port_group_table *pg_table =
+(struct sbrec_port_group_table *)EN_OVSDB_GET(
+engine_get_input("SB_port_group", node));
+
+port_groups_update(pg_table, >port_groups, >new,
+ >deleted, >updated);
+
+node->changed = !sset_is_empty(>new) || !sset_is_empty(>deleted)
+|| !sset_is_empty(>updated);
+
+pg->change_tracked = true;
+node->changed = true;
+return true;
+}
+
 struct ed_type_runtime_data {
 /* Contains "struct local_datapath" nodes. */
 struct hmap local_datapaths;
@@ -1338,7 +1385,8 @@ flow_output_sb_multicast_group_handler(struct engine_node 
*node)
 }
 
 static bool
-flow_output_addr_sets_handler(struct engine_node *node)
+_flow_output_resource_ref_handler(struct engine_node *node,
+ enum ref_type ref_type)
 {
 struct ed_type_runtime_data *data =
 (struct ed_type_runtime_data *)engine_get_input(
@@ -1349,11 +1397,6 @@ flow_output_addr_sets_handler(struct engine_node *node)
 
 struct ed_type_addr_sets *as_data =
 (struct ed_type_addr_sets *)engine_get_input("addr_sets", node)->data;
-
-/* XXX: The change_tracked check may be added to inc-proc framework. */
-if (!as_data->change_tracked) {
-return false;
-}
 struct shash *addr_sets = _data->addr_sets;
 
 struct ed_type_port_groups *pg_data =
@@ -1412,10 +1455,35 @@ flow_output_addr_sets_handler(struct engine_node *node)
 engine_get_input("SB_logical_flow", node));
 
 bool changed;
-const char *as;
+const char *ref_name;
+struct sset *new, *updated, *deleted;
+
+switch (ref_type) {
+case REF_TYPE_ADDRSET:
+/* XXX: The change_tracked check may be added to inc-proc
+ * framework. */
+if (!as_data->change_tracked) {
+return false;
+}
+new = _data->new;
+updated = _data->updated;
+deleted = _data->deleted;
+break;
+case REF_TYPE_PORTGROUP:
+if (!pg_data->change_tracked) {
+return false;
+}
+new = _data->new;
+updated = _data->updated;
+deleted = _data->deleted;
+break;
+default:
+OVS_NOT_REACHED();
+}
+
 
-SSET_FOR_EACH (as, _data->deleted) {
-if (!lflow_handle_changed_ref(REF_TYPE_ADDRSET, as,
+SSET_FOR_EACH (ref_name, deleted) {
+if (!lflow_handle_changed_ref(ref_type, ref_name,
 sbrec_chassis_by_name,
 sbrec_multicast_group_by_name_datapath,
 sbrec_port_binding_by_name,dhcp_table,
@@ -1428,8 +1496,8 @@ flow_output_addr_sets_handler(struct engine_node 

[ovs-dev] [RFC 12/14] ovn-controller: Incremental processing for address-set changes.

2018-07-24 Thread Han Zhou
Signed-off-by: Han Zhou 
---
 ovn/controller/lflow.c  | 107 +
 ovn/controller/lflow.h  |  22 +
 ovn/controller/ovn-controller.c | 172 +++-
 tests/ovn.at|  75 ++
 4 files changed, 374 insertions(+), 2 deletions(-)

diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index 40131aa..5cc6891 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -413,6 +413,113 @@ lflow_handle_changed_flows(
 return ret;
 }
 
+bool
+lflow_handle_changed_ref(
+enum ref_type ref_type,
+const char *ref_name,
+struct ovsdb_idl_index *sbrec_chassis_by_name,
+struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath,
+struct ovsdb_idl_index *sbrec_port_binding_by_name,
+const struct sbrec_dhcp_options_table *dhcp_options_table,
+const struct sbrec_dhcpv6_options_table *dhcpv6_options_table,
+const struct sbrec_logical_flow_table *logical_flow_table,
+const struct hmap *local_datapaths,
+const struct sbrec_chassis *chassis,
+const struct shash *addr_sets,
+const struct shash *port_groups,
+const struct sset *active_tunnels,
+const struct sset *local_lport_ids,
+struct ovn_desired_flow_table *flow_table,
+struct ovn_extend_table *group_table,
+struct ovn_extend_table *meter_table,
+struct lflow_resource_ref *lfrr,
+uint32_t *conj_id_ofs,
+bool *changed)
+{
+struct ref_lflow_node *rlfn = ref_lflow_lookup(>ref_lflow_table,
+   ref_type, ref_name);
+if (!rlfn) {
+*changed = false;
+return true;
+}
+VLOG_DBG("Handle changed lflow reference for resource type: %d,"
+ " name: %s.", ref_type, ref_name);
+*changed = false;
+bool ret = true;
+
+hmap_remove(>ref_lflow_table, >node);
+
+struct lflow_ref_list_node *lrln, *next;
+/* Detach the rlfn->ref_lflow_head nodes from the lfrr table and clean
+ * up all other nodes related to the lflows that uses the resource,
+ * so that the old nodes won't interfere with updating the lfrr table
+ * when reparsing the lflows. */
+LIST_FOR_EACH (lrln, ref_list, >ref_lflow_head) {
+ovs_list_remove(>lflow_list);
+lflow_resource_destroy_lflow(lfrr, >lflow_uuid);
+}
+
+struct hmap dhcp_opts = HMAP_INITIALIZER(_opts);
+struct hmap dhcpv6_opts = HMAP_INITIALIZER(_opts);
+const struct sbrec_dhcp_options *dhcp_opt_row;
+SBREC_DHCP_OPTIONS_TABLE_FOR_EACH (dhcp_opt_row, dhcp_options_table) {
+dhcp_opt_add(_opts, dhcp_opt_row->name, dhcp_opt_row->code,
+ dhcp_opt_row->type);
+}
+
+const struct sbrec_dhcpv6_options *dhcpv6_opt_row;
+SBREC_DHCPV6_OPTIONS_TABLE_FOR_EACH(dhcpv6_opt_row, dhcpv6_options_table) {
+   dhcp_opt_add(_opts, dhcpv6_opt_row->name, dhcpv6_opt_row->code,
+dhcpv6_opt_row->type);
+}
+
+struct hmap nd_ra_opts = HMAP_INITIALIZER(_ra_opts);
+nd_ra_opts_init(_ra_opts);
+
+/* Re-parse the related lflows. */
+LIST_FOR_EACH (lrln, ref_list, >ref_lflow_head) {
+const struct sbrec_logical_flow *lflow =
+sbrec_logical_flow_table_get_for_uuid(logical_flow_table,
+  >lflow_uuid);
+if (!lflow) {
+VLOG_DBG("Reprocess lflow "UUID_FMT" for resource type: %d,"
+ " name: %s - not found.",
+ UUID_ARGS(>lflow_uuid),
+ ref_type, ref_name);
+continue;
+}
+VLOG_DBG("Reprocess lflow "UUID_FMT" for resource type: %d,"
+ " name: %s.",
+ UUID_ARGS(>lflow_uuid),
+ ref_type, ref_name);
+ofctrl_remove_flows(flow_table, >lflow_uuid);
+if (!consider_logical_flow(sbrec_chassis_by_name,
+   sbrec_multicast_group_by_name_datapath,
+   sbrec_port_binding_by_name,
+   lflow, local_datapaths,
+   chassis, _opts, _opts, 
_ra_opts,
+   addr_sets, port_groups, active_tunnels,
+   local_lport_ids, flow_table,
+   group_table, meter_table,
+   lfrr, conj_id_ofs)) {
+ret = false;
+break;
+}
+*changed = true;
+}
+
+LIST_FOR_EACH_SAFE (lrln, next, ref_list, >ref_lflow_head) {
+ovs_list_remove(>ref_list);
+free(lrln);
+}
+free(rlfn);
+
+dhcp_opts_destroy(_opts);
+dhcp_opts_destroy(_opts);
+nd_ra_opts_destroy(_ra_opts);
+return ret;
+}
+
 static bool
 update_conj_id_ofs(uint32_t *conj_id_ofs, uint32_t n_conjs)
 {
diff --git a/ovn/controller/lflow.h b/ovn/controller/lflow.h
index bb5949b..01dda1d 100644
--- 

[ovs-dev] [RFC 13/14] ovn-controller: Split port_groups from runtime_data.

2018-07-24 Thread Han Zhou
Signed-off-by: Han Zhou 
---
 ovn/controller/ovn-controller.c | 91 +
 1 file changed, 73 insertions(+), 18 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 7bccb42..13bc333 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -317,7 +317,7 @@ addr_sets_update(const struct sbrec_address_set_table 
*address_set_table,
 
 /* Iterate port groups in the southbound database.  Create and update the
  * corresponding symtab entries as necessary. */
-static void
+ static void
 port_groups_init(const struct sbrec_port_group_table *port_group_table,
  struct shash *port_groups)
 {
@@ -698,6 +698,56 @@ addr_sets_sb_address_set_handler(struct engine_node *node)
 return true;
 }
 
+struct ed_type_port_groups{
+struct shash port_groups;
+bool change_tracked;
+struct sset new;
+struct sset deleted;
+struct sset updated;
+};
+
+static void
+en_port_groups_init(struct engine_node *node)
+{
+struct ed_type_port_groups *pg = (struct ed_type_port_groups *)node->data;
+shash_init(>port_groups);
+pg->change_tracked = false;
+sset_init(>new);
+sset_init(>deleted);
+sset_init(>updated);
+}
+
+static void
+en_port_groups_cleanup(struct engine_node *node)
+{
+struct ed_type_port_groups *pg = (struct ed_type_port_groups *)node->data;
+expr_const_sets_destroy(>port_groups);
+shash_destroy(>port_groups);
+sset_destroy(>new);
+sset_destroy(>deleted);
+sset_destroy(>updated);
+}
+
+static void
+en_port_groups_run(struct engine_node *node)
+{
+struct ed_type_port_groups *pg = (struct ed_type_port_groups *)node->data;
+
+sset_clear(>new);
+sset_clear(>deleted);
+sset_clear(>updated);
+expr_const_sets_destroy(>port_groups);
+
+struct sbrec_port_group_table *pg_table =
+(struct sbrec_port_group_table *)EN_OVSDB_GET(
+engine_get_input("SB_port_group", node));
+
+port_groups_init(pg_table, >port_groups);
+
+pg->change_tracked = false;
+node->changed = true;
+}
+
 struct ed_type_runtime_data {
 /* Contains "struct local_datapath" nodes. */
 struct hmap local_datapaths;
@@ -713,7 +763,6 @@ struct ed_type_runtime_data {
  * _ */
 struct sset local_lport_ids;
 struct sset active_tunnels;
-struct shash port_groups;
 
 /* connection tracking zones. */
 unsigned long ct_zone_bitmap[BITMAP_N_LONGS(MAX_CT_ZONES)];
@@ -736,7 +785,6 @@ en_runtime_data_init(struct engine_node *node)
 sset_init(>local_lports);
 sset_init(>local_lport_ids);
 sset_init(>active_tunnels);
-shash_init(>port_groups);
 shash_init(>pending_ct_zones);
 simap_init(>ct_zones);
 
@@ -753,9 +801,6 @@ en_runtime_data_cleanup(struct engine_node *node)
 struct ed_type_runtime_data *data =
 (struct ed_type_runtime_data *)node->data;
 
-expr_const_sets_destroy(>port_groups);
-shash_destroy(>port_groups);
-
 sset_destroy(>local_lports);
 sset_destroy(>local_lport_ids);
 sset_destroy(>active_tunnels);
@@ -781,7 +826,6 @@ en_runtime_data_run(struct engine_node *node)
 struct sset *local_lports = >local_lports;
 struct sset *local_lport_ids = >local_lport_ids;
 struct sset *active_tunnels = >active_tunnels;
-struct shash *port_groups = >port_groups;
 unsigned long *ct_zone_bitmap = data->ct_zone_bitmap;
 struct shash *pending_ct_zones = >pending_ct_zones;
 struct simap *ct_zones = >ct_zones;
@@ -801,7 +845,6 @@ en_runtime_data_run(struct engine_node *node)
 sset_destroy(local_lports);
 sset_destroy(local_lport_ids);
 sset_destroy(active_tunnels);
-expr_const_sets_destroy(port_groups);
 sset_init(local_lports);
 sset_init(local_lport_ids);
 sset_init(active_tunnels);
@@ -867,11 +910,6 @@ en_runtime_data_run(struct engine_node *node)
 active_tunnels, local_datapaths,
 local_lports, local_lport_ids);
 
-struct sbrec_port_group_table *pg_table =
-(struct sbrec_port_group_table *)EN_OVSDB_GET(
-engine_get_input("SB_port_group", node));
-port_groups_init(pg_table, port_groups);
-
 update_ct_zones(local_lports, local_datapaths, ct_zones,
 ct_zone_bitmap, pending_ct_zones);
 
@@ -964,7 +1002,6 @@ en_flow_output_run(struct engine_node *node)
 struct sset *local_lports = _data->local_lports;
 struct sset *local_lport_ids = _data->local_lport_ids;
 struct sset *active_tunnels = _data->active_tunnels;
-struct shash *port_groups = _data->port_groups;
 struct simap *ct_zones = _data->ct_zones;
 
 struct ovsrec_open_vswitch_table *ovs_table =
@@ -984,6 +1021,11 @@ en_flow_output_run(struct engine_node *node)
 (struct ed_type_addr_sets *)engine_get_input("addr_sets", node)->data;
 struct shash *addr_sets = _data->addr_sets;
 
+struct ed_type_port_groups 

[ovs-dev] [RFC 11/14] ovn-controller: Maintain resource references for logical flows.

2018-07-24 Thread Han Zhou
This patch maintains the cross reference between logical flows and
the resources such as address sets and port groups that are used by
logical flows. This data will be needed in address set and port
group incremental processing.

Signed-off-by: Han Zhou 
---
 include/ovn/expr.h  |   5 +-
 ovn/controller/lflow.c  | 147 ++--
 ovn/controller/lflow.h  |  52 ++
 ovn/controller/ovn-controller.c |  13 +++-
 ovn/lib/actions.c   |   2 +-
 ovn/lib/expr.c  |  21 --
 ovn/utilities/ovn-trace.c   |   2 +-
 tests/test-ovn.c|   7 +-
 8 files changed, 231 insertions(+), 18 deletions(-)

diff --git a/include/ovn/expr.h b/include/ovn/expr.h
index 3995e62..c0664ac 100644
--- a/include/ovn/expr.h
+++ b/include/ovn/expr.h
@@ -65,6 +65,7 @@ struct flow;
 struct ofpbuf;
 struct shash;
 struct simap;
+struct sset;
 
 /* "Measurement level" of a field.  See "Level of Measurement" in the large
  * comment on struct expr_symbol below for more information. */
@@ -383,10 +384,12 @@ void expr_format(const struct expr *, struct ds *);
 void expr_print(const struct expr *);
 struct expr *expr_parse(struct lexer *, const struct shash *symtab,
 const struct shash *addr_sets,
-const struct shash *port_groups);
+const struct shash *port_groups,
+struct sset *addr_sets_ref);
 struct expr *expr_parse_string(const char *, const struct shash *symtab,
const struct shash *addr_sets,
const struct shash *port_groups,
+   struct sset *addr_sets_ref,
char **errorp);
 
 struct expr *expr_clone(struct expr *);
diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index 5f34b3a..40131aa 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -79,6 +79,7 @@ static bool consider_logical_flow(
 struct ovn_desired_flow_table *,
 struct ovn_extend_table *group_table,
 struct ovn_extend_table *meter_table,
+struct lflow_resource_ref *lfrr,
 uint32_t *conj_id_ofs);
 
 static bool
@@ -138,6 +139,128 @@ is_switch(const struct sbrec_datapath_binding *ldp)
 
 }
 
+void
+lflow_resource_init(struct lflow_resource_ref *lfrr)
+{
+hmap_init(>ref_lflow_table);
+hmap_init(>lflow_ref_table);
+}
+
+void
+lflow_resource_destroy(struct lflow_resource_ref *lfrr)
+{
+struct ref_lflow_node *rlfn, *rlfn_next;
+HMAP_FOR_EACH_SAFE (rlfn, rlfn_next, node, >ref_lflow_table) {
+free(rlfn->ref_name);
+struct lflow_ref_list_node *lrln, *next;
+LIST_FOR_EACH_SAFE (lrln, next, ref_list, >ref_lflow_head) {
+ovs_list_remove(>ref_list);
+ovs_list_remove(>lflow_list);
+free(lrln);
+}
+hmap_remove(>ref_lflow_table, >node);
+free(rlfn);
+}
+hmap_destroy(>ref_lflow_table);
+
+struct lflow_ref_node *lfrn, *lfrn_next;
+HMAP_FOR_EACH_SAFE (lfrn, lfrn_next, node, >lflow_ref_table) {
+hmap_remove(>lflow_ref_table, >node);
+free(lfrn);
+}
+hmap_destroy(>lflow_ref_table);
+}
+
+void
+lflow_resource_clear(struct lflow_resource_ref *lfrr)
+{
+lflow_resource_destroy(lfrr);
+lflow_resource_init(lfrr);
+}
+
+static struct ref_lflow_node*
+ref_lflow_lookup(struct hmap *ref_lflow_table,
+ enum ref_type type, const char *ref_name)
+{
+struct ref_lflow_node *rlfn;
+
+HMAP_FOR_EACH_WITH_HASH (rlfn, node, hash_string(ref_name, type),
+ ref_lflow_table) {
+if (rlfn->type == type && !strcmp(rlfn->ref_name, ref_name)) {
+return rlfn;
+}
+}
+return NULL;
+}
+
+static struct lflow_ref_node*
+lflow_ref_lookup(struct hmap *lflow_ref_table,
+ const struct uuid *lflow_uuid)
+{
+struct lflow_ref_node *lfrn;
+
+HMAP_FOR_EACH_WITH_HASH (lfrn, node, uuid_hash(lflow_uuid),
+ lflow_ref_table) {
+if (uuid_equals(>lflow_uuid, lflow_uuid)) {
+return lfrn;
+}
+}
+return NULL;
+}
+
+static void
+lflow_resource_add(struct lflow_resource_ref *lfrr, enum ref_type type,
+   const char *ref_name, const struct uuid *lflow_uuid)
+{
+struct ref_lflow_node *rlfn = ref_lflow_lookup(>ref_lflow_table,
+   type, ref_name);
+if (!rlfn) {
+rlfn = xzalloc(sizeof *rlfn);
+rlfn->node.hash = hash_string(ref_name, type);
+rlfn->type = type;
+rlfn->ref_name = xstrdup(ref_name);
+ovs_list_init(>ref_lflow_head);
+hmap_insert(>ref_lflow_table, >node, rlfn->node.hash);
+}
+
+struct lflow_ref_node *lfrn = lflow_ref_lookup(>lflow_ref_table,
+   lflow_uuid);
+if (!lfrn) {
+lfrn 

[ovs-dev] [RFC 10/14] ovn-controller: Split addr_sets from runtime_data.

2018-07-24 Thread Han Zhou
Signed-off-by: Han Zhou 
---
 ovn/controller/ovn-controller.c | 75 -
 1 file changed, 60 insertions(+), 15 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index e19c29f..65a5aa7 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -601,6 +601,52 @@ const char *ovs_engine_node_names[] = {
 OVS_NODES
 #undef OVS_NODE
 
+struct ed_type_addr_sets {
+struct shash addr_sets;
+struct sset new;
+struct sset deleted;
+struct sset updated;
+};
+
+static void
+en_addr_sets_init(struct engine_node *node)
+{
+struct ed_type_addr_sets *as = (struct ed_type_addr_sets *)node->data;
+shash_init(>addr_sets);
+sset_init(>new);
+sset_init(>deleted);
+sset_init(>updated);
+}
+
+static void
+en_addr_sets_cleanup(struct engine_node *node)
+{
+struct ed_type_addr_sets *as = (struct ed_type_addr_sets *)node->data;
+expr_const_sets_destroy(>addr_sets);
+shash_destroy(>addr_sets);
+sset_destroy(>new);
+sset_destroy(>deleted);
+sset_destroy(>updated);
+}
+
+static void
+en_addr_sets_run(struct engine_node *node)
+{
+struct ed_type_addr_sets *as = (struct ed_type_addr_sets *)node->data;
+
+sset_clear(>new);
+sset_clear(>deleted);
+sset_clear(>updated);
+expr_const_sets_destroy(>addr_sets);
+
+struct sbrec_address_set_table *as_table =
+(struct sbrec_address_set_table *)EN_OVSDB_GET(
+engine_get_input("SB_address_set", node));
+
+addr_sets_init(as_table, >addr_sets);
+
+node->changed = true;
+}
 
 struct ed_type_runtime_data {
 /* Contains "struct local_datapath" nodes. */
@@ -617,7 +663,6 @@ struct ed_type_runtime_data {
  * _ */
 struct sset local_lport_ids;
 struct sset active_tunnels;
-struct shash addr_sets;
 struct shash port_groups;
 
 /* connection tracking zones. */
@@ -641,7 +686,6 @@ en_runtime_data_init(struct engine_node *node)
 sset_init(>local_lports);
 sset_init(>local_lport_ids);
 sset_init(>active_tunnels);
-shash_init(>addr_sets);
 shash_init(>port_groups);
 shash_init(>pending_ct_zones);
 simap_init(>ct_zones);
@@ -659,8 +703,6 @@ en_runtime_data_cleanup(struct engine_node *node)
 struct ed_type_runtime_data *data =
 (struct ed_type_runtime_data *)node->data;
 
-expr_const_sets_destroy(>addr_sets);
-shash_destroy(>addr_sets);
 expr_const_sets_destroy(>port_groups);
 shash_destroy(>port_groups);
 
@@ -689,7 +731,6 @@ en_runtime_data_run(struct engine_node *node)
 struct sset *local_lports = >local_lports;
 struct sset *local_lport_ids = >local_lport_ids;
 struct sset *active_tunnels = >active_tunnels;
-struct shash *addr_sets = >addr_sets;
 struct shash *port_groups = >port_groups;
 unsigned long *ct_zone_bitmap = data->ct_zone_bitmap;
 struct shash *pending_ct_zones = >pending_ct_zones;
@@ -710,7 +751,6 @@ en_runtime_data_run(struct engine_node *node)
 sset_destroy(local_lports);
 sset_destroy(local_lport_ids);
 sset_destroy(active_tunnels);
-expr_const_sets_destroy(addr_sets);
 expr_const_sets_destroy(port_groups);
 sset_init(local_lports);
 sset_init(local_lport_ids);
@@ -777,11 +817,6 @@ en_runtime_data_run(struct engine_node *node)
 active_tunnels, local_datapaths,
 local_lports, local_lport_ids);
 
-struct sbrec_address_set_table *as_table =
-(struct sbrec_address_set_table *)EN_OVSDB_GET(
-engine_get_input("SB_address_set", node));
-addr_sets_init(as_table, addr_sets);
-
 struct sbrec_port_group_table *pg_table =
 (struct sbrec_port_group_table *)EN_OVSDB_GET(
 engine_get_input("SB_port_group", node));
@@ -875,7 +910,6 @@ en_flow_output_run(struct engine_node *node)
 struct sset *local_lports = _data->local_lports;
 struct sset *local_lport_ids = _data->local_lport_ids;
 struct sset *active_tunnels = _data->active_tunnels;
-struct shash *addr_sets = _data->addr_sets;
 struct shash *port_groups = _data->port_groups;
 struct simap *ct_zones = _data->ct_zones;
 
@@ -892,6 +926,10 @@ en_flow_output_run(struct engine_node *node)
 engine_ovsdb_node_get_index(
 engine_get_input("SB_chassis", node),
 "name");
+struct ed_type_addr_sets *as_data =
+(struct ed_type_addr_sets *)engine_get_input("addr_sets", node)->data;
+struct shash *addr_sets = _data->addr_sets;
+
 const struct sbrec_chassis *chassis = NULL;
 if (chassis_id) {
 chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
@@ -985,8 +1023,10 @@ flow_output_sb_logical_flow_handler(struct engine_node 
*node)
 struct hmap *local_datapaths = >local_datapaths;
 struct sset *local_lport_ids = >local_lport_ids;
 struct sset *active_tunnels = >active_tunnels;
-struct 

[ovs-dev] [RFC 05/14] ovn-controller: Incremental logical flow processing

2018-07-24 Thread Han Zhou
Persistents flow-table and implements change handler of flow_output
for SB lflow changes.

Signed-off-by: Han Zhou 
---
 include/ovn/actions.h   |   3 +
 ovn/controller/lflow.c  | 180 --
 ovn/controller/lflow.h  |  25 -
 ovn/controller/ofctrl.c | 241 
 ovn/controller/ofctrl.h |  27 -
 ovn/controller/ovn-controller.c | 152 +++--
 ovn/controller/physical.c   |  73 ++--
 ovn/controller/physical.h   |   2 +-
 ovn/lib/actions.c   |   6 +-
 ovn/lib/extend-table.c  |  60 +++---
 ovn/lib/extend-table.h  |  16 ++-
 11 files changed, 587 insertions(+), 198 deletions(-)

diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 6384651..5e4bd5b 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -520,6 +520,9 @@ struct ovnact_encode_params {
 /* A struct to figure out the meter_id for meter actions. */
 struct ovn_extend_table *meter_table;
 
+/* The logical flow uuid that drove this action. */
+struct uuid lflow_uuid;
+
 /* OVN maps each logical flow table (ltable), one-to-one, onto a physical
  * OpenFlow flow table (ptable).  A number of parameters describe this
  * mapping and data related to flow tables:
diff --git a/ovn/controller/lflow.c b/ovn/controller/lflow.c
index 8db8192..5f34b3a 100644
--- a/ovn/controller/lflow.c
+++ b/ovn/controller/lflow.c
@@ -62,7 +62,7 @@ struct condition_aux {
 const struct sset *active_tunnels;
 };
 
-static void consider_logical_flow(
+static bool consider_logical_flow(
 struct ovsdb_idl_index *sbrec_chassis_by_name,
 struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath,
 struct ovsdb_idl_index *sbrec_port_binding_by_name,
@@ -76,10 +76,10 @@ static void consider_logical_flow(
 const struct shash *port_groups,
 const struct sset *active_tunnels,
 const struct sset *local_lport_ids,
-uint32_t *conj_id_ofs,
-struct hmap *flow_table,
+struct ovn_desired_flow_table *,
 struct ovn_extend_table *group_table,
-struct ovn_extend_table *meter_table);
+struct ovn_extend_table *meter_table,
+uint32_t *conj_id_ofs);
 
 static bool
 lookup_port_cb(const void *aux_, const char *port_name, unsigned int *portp)
@@ -153,11 +153,11 @@ add_logical_flows(
 const struct shash *port_groups,
 const struct sset *active_tunnels,
 const struct sset *local_lport_ids,
-struct hmap *flow_table,
+struct ovn_desired_flow_table *flow_table,
 struct ovn_extend_table *group_table,
-struct ovn_extend_table *meter_table)
+struct ovn_extend_table *meter_table,
+uint32_t *conj_id_ofs)
 {
-uint32_t conj_id_ofs = 1;
 const struct sbrec_logical_flow *lflow;
 
 struct hmap dhcp_opts = HMAP_INITIALIZER(_opts);
@@ -180,14 +180,19 @@ add_logical_flows(
 nd_ra_opts_init(_ra_opts);
 
 SBREC_LOGICAL_FLOW_TABLE_FOR_EACH (lflow, logical_flow_table) {
-consider_logical_flow(sbrec_chassis_by_name,
-  sbrec_multicast_group_by_name_datapath,
-  sbrec_port_binding_by_name,
-  lflow, local_datapaths,
-  chassis, _opts, _opts, _ra_opts,
-  addr_sets, port_groups, active_tunnels,
-  local_lport_ids, _id_ofs,
-  flow_table, group_table, meter_table);
+if (!consider_logical_flow(sbrec_chassis_by_name,
+   sbrec_multicast_group_by_name_datapath,
+   sbrec_port_binding_by_name,
+   lflow, local_datapaths,
+   chassis, _opts, _opts, 
_ra_opts,
+   addr_sets, port_groups, active_tunnels,
+   local_lport_ids, flow_table,
+   group_table, meter_table,
+   conj_id_ofs)) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
+VLOG_ERR_RL(, "Conjunction id overflow when processing lflow "
+UUID_FMT, UUID_ARGS(>header_.uuid));
+}
 }
 
 dhcp_opts_destroy(_opts);
@@ -195,7 +200,102 @@ add_logical_flows(
 nd_ra_opts_destroy(_ra_opts);
 }
 
-static void
+bool
+lflow_handle_changed_flows(
+struct ovsdb_idl_index *sbrec_chassis_by_name,
+struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath,
+struct ovsdb_idl_index *sbrec_port_binding_by_name,
+const struct sbrec_dhcp_options_table *dhcp_options_table,
+const struct sbrec_dhcpv6_options_table *dhcpv6_options_table,
+const struct sbrec_logical_flow_table *logical_flow_table,
+const struct hmap *local_datapaths,
+const struct sbrec_chassis *chassis,
+const struct shash 

[ovs-dev] [RFC 09/14] ovsdb-idl: Tracking - preserve data for deleted rows.

2018-07-24 Thread Han Zhou
OVSDB IDL can track changes, but for deleted rows, the data is
destroyed and only uuid is tracked. In some cases we need to
check the data of the deleted rows. This patch preserves data
for deleted rows until track clear is called.

Signed-off-by: Han Zhou 
---
 lib/ovsdb-idl-provider.h |  2 ++
 lib/ovsdb-idl.c  | 36 +---
 2 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/lib/ovsdb-idl-provider.h b/lib/ovsdb-idl-provider.h
index 2eee4fd..30d1d08 100644
--- a/lib/ovsdb-idl-provider.h
+++ b/lib/ovsdb-idl-provider.h
@@ -73,6 +73,7 @@ struct ovsdb_idl_row {
 struct ovs_list dst_arcs;   /* Backward arcs (ovsdb_idl_arc.dst_node). */
 struct ovsdb_idl_table *table; /* Containing table. */
 struct ovsdb_datum *old_datum; /* Committed data (null if orphaned). */
+bool parsed; /* Whether the row is parsed. */
 
 /* Transactional data. */
 struct ovsdb_datum *new_datum; /* Modified data (null to delete row). */
@@ -88,6 +89,7 @@ struct ovsdb_idl_row {
 unsigned int change_seqno[OVSDB_IDL_CHANGE_MAX];
 struct ovs_list track_node; /* Rows modified/added/deleted by IDL */
 unsigned long int *updated; /* Bitmap of columns updated by IDL */
+struct ovsdb_datum *tracked_old_datum; /* Old deleted data. */
 };
 
 struct ovsdb_idl_column {
diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c
index 9ab5d67..a796847 100644
--- a/lib/ovsdb-idl.c
+++ b/lib/ovsdb-idl.c
@@ -1705,7 +1705,16 @@ ovsdb_idl_db_track_clear(struct ovsdb_idl_db *db)
 }
 ovs_list_remove(>track_node);
 ovs_list_init(>track_node);
-if (ovsdb_idl_row_is_orphan(row)) {
+if (ovsdb_idl_row_is_orphan(row) && row->tracked_old_datum) {
+ovsdb_idl_row_unparse(row);
+const struct ovsdb_idl_table_class *class =
+row->table->class_;
+for (size_t c = 0; c < class->n_columns; c++) {
+ovsdb_datum_destroy(>tracked_old_datum[c],
+>columns[c].type);
+}
+free(row->tracked_old_datum);
+row->tracked_old_datum = NULL;
 free(row);
 }
 }
@@ -2432,10 +2441,14 @@ ovsdb_idl_row_parse(struct ovsdb_idl_row *row)
 const struct ovsdb_idl_table_class *class = row->table->class_;
 size_t i;
 
+if (row->parsed) {
+ovsdb_idl_row_unparse(row);
+}
 for (i = 0; i < class->n_columns; i++) {
 const struct ovsdb_idl_column *c = >columns[i];
 (c->parse)(row, >old_datum[i]);
 }
+row->parsed = true;
 }
 
 static void
@@ -2444,10 +2457,14 @@ ovsdb_idl_row_unparse(struct ovsdb_idl_row *row)
 const struct ovsdb_idl_table_class *class = row->table->class_;
 size_t i;
 
+if (!row->parsed) {
+return;
+}
 for (i = 0; i < class->n_columns; i++) {
 const struct ovsdb_idl_column *c = >columns[i];
 (c->unparse)(row);
 }
+row->parsed = false;
 }
 
 /* The OVSDB-IDL Compound Indexes feature allows for the creation of custom
@@ -2775,13 +2792,18 @@ ovsdb_idl_row_clear_old(struct ovsdb_idl_row *row)
 {
 ovs_assert(row->old_datum == row->new_datum);
 if (!ovsdb_idl_row_is_orphan(row)) {
-const struct ovsdb_idl_table_class *class = row->table->class_;
-size_t i;
+if (ovsdb_idl_track_is_set(row->table)) {
+row->tracked_old_datum = row->old_datum;
+} else {
+const struct ovsdb_idl_table_class *class = row->table->class_;
+size_t i;
 
-for (i = 0; i < class->n_columns; i++) {
-ovsdb_datum_destroy(>old_datum[i], >columns[i].type);
+for (i = 0; i < class->n_columns; i++) {
+ovsdb_datum_destroy(>old_datum[i],
+>columns[i].type);
+}
+free(row->old_datum);
 }
-free(row->old_datum);
 row->old_datum = row->new_datum = NULL;
 }
 }
@@ -2957,6 +2979,7 @@ ovsdb_idl_row_destroy_postprocess(struct ovsdb_idl_db *db)
 LIST_FOR_EACH_SAFE(row, next, track_node, >track_list) {
 if (!ovsdb_idl_track_is_set(row->table)) {
 ovs_list_remove(>track_node);
+ovsdb_idl_row_unparse(row);
 free(row);
 }
 }
@@ -2987,7 +3010,6 @@ static void
 ovsdb_idl_delete_row(struct ovsdb_idl_row *row)
 {
 ovsdb_idl_remove_from_indexes(row);
-ovsdb_idl_row_unparse(row);
 ovsdb_idl_row_clear_arcs(row, true);
 ovsdb_idl_row_clear_old(row);
 if (ovs_list_is_empty(>dst_arcs)) {
-- 
2.1.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC 07/14] ovn-controller: port-binding incremental processing for physical flows

2018-07-24 Thread Han Zhou
This patch implements change handler for port-binding in flow_output
for physical flows computing, so that physical flow computing will
be incremental.

This patch together with previous incremental processing engine
related changes supports incremental processing for lflow changes
and port-binding changes of lports on other HVs, which are the most
common scenarios in a cloud where workloads come up and down.

In ovn-scale-test env [1], the total execution time of creating and
binding 10k ports on 1k HVs with 40 lswitches and 8 lrouters
(5 lswitches/lrouter), decreased from 3h40m to 1h50m because of the
less CPU on HVs. The CPU time of ovn-controller for additional 500
lports creating and binding (on top of already existed 10k lports)
decreased 90% comparing with master.

Latency for end-to-end operations of one extra port on top of the
10k lports, start from port-creation until all flows installation
on all related HVs is also improved significantly:

before: 20.6s in total
- lsp-add: 0.4s
- wait-until port up=true: 4.8s
- --wait=hv sync: 15.4s

after: 7.3s in total
- lsp-add: 0.4s
- wait-until port up=true: 4.0s
- --wait=hv sync: 2.9s

[1] https://github.com/openvswitch/ovn-scale-test

Signed-off-by: Han Zhou 
---
 ovn/controller/ovn-controller.c |  97 ++-
 ovn/controller/physical.c   | 142 +---
 ovn/controller/physical.h   |  11 
 3 files changed, 212 insertions(+), 38 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index ed70c36..ff3cb08 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -1050,6 +1050,101 @@ flow_output_sb_logical_flow_handler(struct engine_node 
*node)
 return handled;
 }
 
+static bool
+flow_output_sb_port_binding_handler(struct engine_node *node)
+{
+struct ed_type_runtime_data *data =
+(struct ed_type_runtime_data *)engine_get_input(
+"runtime_data", node)->data;
+struct hmap *local_datapaths = >local_datapaths;
+struct sset *active_tunnels = >active_tunnels;
+struct simap *ct_zones = >ct_zones;
+
+struct ovsrec_open_vswitch_table *ovs_table =
+(struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
+engine_get_input("OVS_open_vswitch", node));
+struct ovsrec_bridge_table *bridge_table =
+(struct ovsrec_bridge_table *)EN_OVSDB_GET(
+engine_get_input("OVS_bridge", node));
+const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
+const char *chassis_id = get_chassis_id(ovs_table);
+
+struct ovsdb_idl_index *sbrec_chassis_by_name =
+engine_ovsdb_node_get_index(
+engine_get_input("SB_chassis", node),
+"name");
+const struct sbrec_chassis *chassis = NULL;
+if (chassis_id) {
+chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
+}
+ovs_assert(br_int && chassis);
+
+struct ed_type_flow_output *fo =
+(struct ed_type_flow_output *)node->data;
+struct ovn_desired_flow_table *flow_table = >flow_table;
+
+struct ovsdb_idl_index *sbrec_multicast_group_by_name_datapath =
+engine_ovsdb_node_get_index(
+engine_get_input("SB_multicast_group", node),
+"name_datapath");
+
+struct ovsdb_idl_index *sbrec_port_binding_by_name =
+engine_ovsdb_node_get_index(
+engine_get_input("SB_port_binding", node),
+"name");
+
+struct sbrec_port_binding_table *port_binding_table =
+(struct sbrec_port_binding_table *)EN_OVSDB_GET(
+engine_get_input("SB_port_binding", node));
+
+/* XXX: now we handles port-binding changes for physical flow processing
+ * only, but port-binding change can have impact to logical flow
+ * processing, too, in below circumstances:
+ *
+ *  - When a port-binding for a lport is inserted/deleted but the lflow
+ *using that lport doesn't change.
+ *
+ *This is likely to happen only when the lport name is used by ACL
+ *match condition, which is specified by user. Even in that case, when
+ *port is actually bound on the chassis it will trigger recompute on
+ *that chassis since ovs interface is updated. So the only situation
+ *this would have real impact is when user defines an ACL that includes
+ *lport that is not the ingress/egress lport, e.g.:
+ *
+ *to-lport 1000 'outport=="A" && inport=="B"' allow-related
+ *
+ *If "B" is created and bound after the ACL is created, the ACL may not
+ *take effect on the chassis where "A" is bound, until a recompute is
+ *triggered there later.
+ *
+ *  - When is_chassis_resident is used in lflow. In this case the port
+ *binding is patch type, since this condition is used only for lrouter
+ *ports. In current "runtime_data" handling, port-binding 

[ovs-dev] [RFC 04/14] ovn-controller: Initial use of incremental engine.

2018-07-24 Thread Han Zhou
Incremental proccessing engine is used to compute flows. In this
patch we create below engine nodes:
- Engine nodes for each OVSDB table in local OVS DB and SB DB.
- runtime_data: compute and maintain intermediate result such
as local_datapath, etc.
- flow_output: compute and maintain computed flow table.

In each iteration if there is any input change then everything is
still recomputed, but there is no recompute if there is no change.
For example, pinctrl input will not trigger flow recompute any
more.

Signed-off-by: Han Zhou 
---
 ovn/controller/binding.c|   1 +
 ovn/controller/ofctrl.c |  21 +-
 ovn/controller/ofctrl.h |   5 +-
 ovn/controller/ovn-controller.c | 756 ++--
 4 files changed, 595 insertions(+), 188 deletions(-)

diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index 8d5f13d..a4b30cb 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -481,6 +481,7 @@ consider_local_datapath(struct ovsdb_idl_txn *ovnsb_idl_txn,
 update_local_lport_ids(local_lport_ids, binding_rec);
 }
 
+ovs_assert(ovnsb_idl_txn);
 if (ovnsb_idl_txn) {
 const char *vif_chassis = smap_get(_rec->options,
"requested-chassis");
diff --git a/ovn/controller/ofctrl.c b/ovn/controller/ofctrl.c
index 349de3a..134f0e5 100644
--- a/ovn/controller/ofctrl.c
+++ b/ovn/controller/ofctrl.c
@@ -477,11 +477,21 @@ recv_S_UPDATE_FLOWS(const struct ofp_header *oh, enum 
ofptype type,
 }
 }
 
+
+enum mf_field_id
+ofctrl_get_mf_field_id(void)
+{
+if (!rconn_is_connected(swconn)) {
+return 0;
+}
+return (state == S_CLEAR_FLOWS || state == S_UPDATE_FLOWS
+? mff_ovn_geneve : 0);
+}
+
 /* Runs the OpenFlow state machine against 'br_int', which is local to the
  * hypervisor on which we are running.  Attempts to negotiate a Geneve option
- * field for class OVN_GENEVE_CLASS, type OVN_GENEVE_TYPE.  If successful,
- * returns the MFF_* field ID for the option, otherwise returns 0. */
-enum mf_field_id
+ * field for class OVN_GENEVE_CLASS, type OVN_GENEVE_TYPE. */
+void
 ofctrl_run(const struct ovsrec_bridge *br_int, struct shash *pending_ct_zones)
 {
 char *target = xasprintf("unix:%s/%s.mgmt", ovs_rundir(), br_int->name);
@@ -494,7 +504,7 @@ ofctrl_run(const struct ovsrec_bridge *br_int, struct shash 
*pending_ct_zones)
 rconn_run(swconn);
 
 if (!rconn_is_connected(swconn)) {
-return 0;
+return;
 }
 if (seqno != rconn_get_connection_seqno(swconn)) {
 seqno = rconn_get_connection_seqno(swconn);
@@ -557,9 +567,6 @@ ofctrl_run(const struct ovsrec_bridge *br_int, struct shash 
*pending_ct_zones)
  * point, so ensure that we come back again without waiting. */
 poll_immediate_wake();
 }
-
-return (state == S_CLEAR_FLOWS || state == S_UPDATE_FLOWS
-? mff_ovn_geneve : 0);
 }
 
 void
diff --git a/ovn/controller/ofctrl.h b/ovn/controller/ofctrl.h
index 886b9bd..9346f5c 100644
--- a/ovn/controller/ofctrl.h
+++ b/ovn/controller/ofctrl.h
@@ -32,8 +32,9 @@ struct shash;
 /* Interface for OVN main loop. */
 void ofctrl_init(struct ovn_extend_table *group_table,
  struct ovn_extend_table *meter_table);
-enum mf_field_id ofctrl_run(const struct ovsrec_bridge *br_int,
-struct shash *pending_ct_zones);
+void ofctrl_run(const struct ovsrec_bridge *br_int,
+struct shash *pending_ct_zones);
+enum mf_field_id ofctrl_get_mf_field_id(void);
 bool ofctrl_can_put(void);
 void ofctrl_put(struct hmap *flow_table, struct shash *pending_ct_zones,
 int64_t nb_cfg);
diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 9243466..0a3a803 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -60,6 +60,7 @@
 #include "timeval.h"
 #include "timer.h"
 #include "stopwatch.h"
+#include "ovn/lib/inc-proc-eng.h"
 
 VLOG_DEFINE_THIS_MODULE(main);
 
@@ -195,15 +196,27 @@ update_sb_monitors(struct ovsdb_idl *ovnsb_idl,
 ovsdb_idl_condition_destroy();
 }
 
+static const char *
+br_int_name(const struct ovsrec_open_vswitch *cfg)
+{
+return smap_get_def(>external_ids, "ovn-bridge", DEFAULT_BRIDGE_NAME);
+}
+
 static const struct ovsrec_bridge *
 create_br_int(struct ovsdb_idl_txn *ovs_idl_txn,
-  const struct ovsrec_open_vswitch *cfg,
-  const char *bridge_name)
+  const struct ovsrec_open_vswitch_table *ovs_table)
 {
 if (!ovs_idl_txn) {
 return NULL;
 }
 
+const struct ovsrec_open_vswitch *cfg;
+cfg = ovsrec_open_vswitch_table_first(ovs_table);
+if (!cfg) {
+return NULL;
+}
+const char *bridge_name = br_int_name(cfg);
+
 ovsdb_idl_txn_add_comment(ovs_idl_txn,
 "ovn-controller: creating integration bridge '%s'", bridge_name);
 
@@ -238,8 +251,7 

[ovs-dev] [RFC 08/14] ovn-controller: incremental processing for multicast group changes

2018-07-24 Thread Han Zhou
Signed-off-by: Han Zhou 
---
 ovn/controller/ovn-controller.c | 48 -
 ovn/controller/physical.c   | 23 
 ovn/controller/physical.h   |  7 ++
 3 files changed, 77 insertions(+), 1 deletion(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index ff3cb08..e19c29f 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -1145,6 +1145,52 @@ flow_output_sb_port_binding_handler(struct engine_node 
*node)
 return true;
 }
 
+static bool
+flow_output_sb_multicast_group_handler(struct engine_node *node)
+{
+struct ed_type_runtime_data *data =
+(struct ed_type_runtime_data *)engine_get_input(
+"runtime_data", node)->data;
+struct hmap *local_datapaths = >local_datapaths;
+struct simap *ct_zones = >ct_zones;
+
+struct ovsrec_open_vswitch_table *ovs_table =
+(struct ovsrec_open_vswitch_table *)EN_OVSDB_GET(
+engine_get_input("OVS_open_vswitch", node));
+struct ovsrec_bridge_table *bridge_table =
+(struct ovsrec_bridge_table *)EN_OVSDB_GET(
+engine_get_input("OVS_bridge", node));
+const struct ovsrec_bridge *br_int = get_br_int(bridge_table, ovs_table);
+const char *chassis_id = get_chassis_id(ovs_table);
+
+struct ovsdb_idl_index *sbrec_chassis_by_name =
+engine_ovsdb_node_get_index(
+engine_get_input("SB_chassis", node),
+"name");
+const struct sbrec_chassis *chassis = NULL;
+if (chassis_id) {
+chassis = chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id);
+}
+ovs_assert(br_int && chassis);
+
+struct ed_type_flow_output *fo =
+(struct ed_type_flow_output *)node->data;
+struct ovn_desired_flow_table *flow_table = >flow_table;
+
+struct sbrec_multicast_group_table *multicast_group_table =
+(struct sbrec_multicast_group_table *)EN_OVSDB_GET(
+engine_get_input("SB_multicast_group", node));
+
+enum mf_field_id mff_ovn_geneve = ofctrl_get_mf_field_id();
+physical_handle_mc_group_changes(multicast_group_table,
+mff_ovn_geneve, chassis, ct_zones, local_datapaths,
+flow_table);
+
+node->changed = true;
+return true;
+
+}
+
 int
 main(int argc, char *argv[])
 {
@@ -1240,7 +1286,7 @@ main(int argc, char *argv[])
 
 engine_add_input(_flow_output, _sb_chassis, NULL);
 engine_add_input(_flow_output, _sb_encap, NULL);
-engine_add_input(_flow_output, _sb_multicast_group, NULL);
+engine_add_input(_flow_output, _sb_multicast_group, 
flow_output_sb_multicast_group_handler);
 engine_add_input(_flow_output, _sb_datapath_binding, NULL);
 engine_add_input(_flow_output, _sb_port_binding, 
flow_output_sb_port_binding_handler);
 engine_add_input(_flow_output, _sb_mac_binding, NULL);
diff --git a/ovn/controller/physical.c b/ovn/controller/physical.c
index e2c7f54..9405107 100644
--- a/ovn/controller/physical.c
+++ b/ovn/controller/physical.c
@@ -944,6 +944,29 @@ void physical_handle_port_binding_changes(
 }
 
 void
+physical_handle_mc_group_changes(
+const struct sbrec_multicast_group_table *multicast_group_table,
+enum mf_field_id mff_ovn_geneve,
+const struct sbrec_chassis *chassis,
+const struct simap *ct_zones,
+const struct hmap *local_datapaths,
+struct ovn_desired_flow_table *flow_table)
+{
+const struct sbrec_multicast_group *mc;
+SBREC_MULTICAST_GROUP_TABLE_FOR_EACH_TRACKED (mc, multicast_group_table) {
+if (sbrec_multicast_group_is_deleted(mc)) {
+ofctrl_remove_flows(flow_table, >header_.uuid);
+} else {
+if (!sbrec_multicast_group_is_new(mc)) {
+ofctrl_remove_flows(flow_table, >header_.uuid);
+}
+consider_mc_group(mff_ovn_geneve, ct_zones, local_datapaths,
+  chassis, mc, flow_table);
+}
+}
+}
+
+void
 physical_run(struct ovsdb_idl_index *sbrec_chassis_by_name,
  struct ovsdb_idl_index *sbrec_port_binding_by_name,
  const struct sbrec_multicast_group_table *multicast_group_table,
diff --git a/ovn/controller/physical.h b/ovn/controller/physical.h
index 097e9f5..0581669 100644
--- a/ovn/controller/physical.h
+++ b/ovn/controller/physical.h
@@ -67,4 +67,11 @@ void physical_handle_port_binding_changes(
 struct sset *active_tunnels,
 struct ovn_desired_flow_table *);
 
+void physical_handle_mc_group_changes(
+const struct sbrec_multicast_group_table *,
+enum mf_field_id mff_ovn_geneve,
+const struct sbrec_chassis *,
+const struct simap *ct_zones,
+const struct hmap *local_datapaths,
+struct ovn_desired_flow_table *);
 #endif /* ovn/physical.h */
-- 
2.1.0

___
dev mailing list
d...@openvswitch.org

[ovs-dev] [RFC 06/14] ovn-controller: runtime_data change handler for SB port-binding

2018-07-24 Thread Han Zhou
Evaluates change for SB port-binding in runtime_data node.
If the port-binding change has no impact for the runtime_data it will
not trigger runtime_data change.

Signed-off-by: Han Zhou 
---
 ovn/controller/binding.c| 91 +
 ovn/controller/binding.h|  7 
 ovn/controller/ovn-controller.c | 42 ++-
 3 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/ovn/controller/binding.c b/ovn/controller/binding.c
index a4b30cb..7ec6074 100644
--- a/ovn/controller/binding.c
+++ b/ovn/controller/binding.c
@@ -615,6 +615,97 @@ binding_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
 hmap_destroy(_map);
 }
 
+static bool
+is_our_chassis(struct ovsdb_idl_index *sbrec_chassis_by_name,
+   const struct sbrec_chassis *chassis_rec,
+   const struct sbrec_port_binding *binding_rec,
+   const struct sset *active_tunnels,
+   const struct shash *lport_to_iface,
+   const struct sset *local_lports)
+{
+const struct ovsrec_interface *iface_rec
+= shash_find_data(lport_to_iface, binding_rec->logical_port);
+struct ovs_list *gateway_chassis = NULL;
+
+bool our_chassis = false;
+if (iface_rec
+|| (binding_rec->parent_port && binding_rec->parent_port[0] &&
+sset_contains(local_lports, binding_rec->parent_port))) {
+/* This port is in our chassis unless it is a localport. */
+if (strcmp(binding_rec->type, "localport")) {
+our_chassis = true;
+}
+} else if (!strcmp(binding_rec->type, "l2gateway")) {
+const char *chassis_id = smap_get(_rec->options,
+  "l2gateway-chassis");
+our_chassis = chassis_id && !strcmp(chassis_id, chassis_rec->name);
+} else if (!strcmp(binding_rec->type, "chassisredirect")) {
+gateway_chassis = gateway_chassis_get_ordered(sbrec_chassis_by_name,
+  binding_rec);
+if (gateway_chassis &&
+gateway_chassis_contains(gateway_chassis, chassis_rec)) {
+
+our_chassis = gateway_chassis_is_active(
+gateway_chassis, chassis_rec, active_tunnels);
+
+}
+gateway_chassis_destroy(gateway_chassis);
+} else if (!strcmp(binding_rec->type, "l3gateway")) {
+const char *chassis_id = smap_get(_rec->options,
+  "l3gateway-chassis");
+our_chassis = chassis_id && !strcmp(chassis_id, chassis_rec->name);
+} else if (!strcmp(binding_rec->type, "localnet")) {
+our_chassis = false;
+}
+
+return our_chassis;
+}
+
+/* Returns true if port-binding changes potentially require flow changes on
+ * the current chassis. Returns false if we are sure there is no impact. */
+bool
+binding_evaluate_port_binding_changes(
+struct ovsdb_idl_index *sbrec_chassis_by_name,
+const struct sbrec_port_binding_table *pb_table,
+const struct ovsrec_bridge *br_int,
+const struct sbrec_chassis *chassis_rec,
+struct sset *active_tunnels,
+struct sset *local_lports)
+{
+if (!chassis_rec) {
+return true;
+}
+
+const struct sbrec_port_binding *binding_rec;
+struct shash lport_to_iface = SHASH_INITIALIZER(_to_iface);
+struct sset egress_ifaces = SSET_INITIALIZER(_ifaces);
+if (br_int) {
+get_local_iface_ids(br_int, _to_iface, local_lports,
+_ifaces);
+}
+SBREC_PORT_BINDING_TABLE_FOR_EACH_TRACKED (binding_rec, pb_table) {
+/* XXX: currently OVSDB change tracking doesn't support getting old
+ * data when the operation is update, so if a port-binding moved from
+ * this chassis to another, we would not know it with this check.
+ * However, if the port is unbound from this chassis, the local ovsdb
+ * interface table will be updated, which will trigger recompute.
+ * If the port is still bound on this chassis, then below check
+ * is_our_chassis() will take care of that case. */
+if (binding_rec->chassis == chassis_rec) {
+return true;
+}
+if (is_our_chassis(sbrec_chassis_by_name, chassis_rec, binding_rec,
+   active_tunnels, _to_iface, local_lports)
+|| !strcmp(binding_rec->type, "patch")
+|| !strcmp(binding_rec->type, "localport")
+|| !strcmp(binding_rec->type, "vtep")
+|| !strcmp(binding_rec->type, "localnet")) {
+return true;
+}
+}
+return false;
+}
+
 /* Returns true if the database is all cleaned up, false if more work is
  * required. */
 bool
diff --git a/ovn/controller/binding.h b/ovn/controller/binding.h
index 837e109..6c62cc5 100644
--- a/ovn/controller/binding.h
+++ b/ovn/controller/binding.h
@@ -48,5 +48,12 @@ void binding_run(struct ovsdb_idl_txn *ovnsb_idl_txn,
 bool 

[ovs-dev] [RFC 02/14] ovn-controller: Incremental processing engine

2018-07-24 Thread Han Zhou
This patch implements the engine which will be used in future patches
for ovn-controller incremental processing.

Signed-off-by: Han Zhou 
---
 ovn/lib/automake.mk|   4 +-
 ovn/lib/inc-proc-eng.c | 201 +
 ovn/lib/inc-proc-eng.h | 240 +
 3 files changed, 444 insertions(+), 1 deletion(-)
 create mode 100644 ovn/lib/inc-proc-eng.c
 create mode 100644 ovn/lib/inc-proc-eng.h

diff --git a/ovn/lib/automake.mk b/ovn/lib/automake.mk
index 6178fc2..c1d37c5 100644
--- a/ovn/lib/automake.mk
+++ b/ovn/lib/automake.mk
@@ -17,7 +17,9 @@ ovn_lib_libovn_la_SOURCES = \
ovn/lib/ovn-util.c \
ovn/lib/ovn-util.h \
ovn/lib/logical-fields.c \
-   ovn/lib/logical-fields.h
+   ovn/lib/logical-fields.h \
+   ovn/lib/inc-proc-eng.c \
+   ovn/lib/inc-proc-eng.h
 nodist_ovn_lib_libovn_la_SOURCES = \
ovn/lib/ovn-nb-idl.c \
ovn/lib/ovn-nb-idl.h \
diff --git a/ovn/lib/inc-proc-eng.c b/ovn/lib/inc-proc-eng.c
new file mode 100644
index 000..1ddea1a
--- /dev/null
+++ b/ovn/lib/inc-proc-eng.c
@@ -0,0 +1,201 @@
+/*
+ * Copyright (c) 2018 eBay Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "lib/util.h"
+#include "openvswitch/dynamic-string.h"
+#include "openvswitch/hmap.h"
+#include "openvswitch/vlog.h"
+#include "inc-proc-eng.h"
+
+VLOG_DEFINE_THIS_MODULE(inc_proc_eng);
+
+static bool engine_force_recompute = false;
+static const struct engine_context *engine_context;
+
+void
+engine_set_force_recompute(bool val)
+{
+engine_force_recompute = val;
+}
+
+const struct engine_context *
+engine_get_context(void)
+{
+return engine_context;
+}
+
+void
+engine_set_context(const struct engine_context *ctx)
+{
+engine_context = ctx;
+}
+
+void
+engine_init(struct engine_node *node)
+{
+for (size_t i = 0; i < node->n_inputs; i++) {
+engine_init(node->inputs[i].node);
+}
+if (node->init) {
+node->init(node);
+}
+}
+
+void
+engine_cleanup(struct engine_node *node)
+{
+for (size_t i = 0; i < node->n_inputs; i++) {
+engine_cleanup(node->inputs[i].node);
+}
+if (node->cleanup) {
+node->cleanup(node);
+}
+}
+
+struct engine_node *
+engine_get_input(const char *input_name, struct engine_node *node)
+{
+size_t i;
+for (i = 0; i < node->n_inputs; i++) {
+if (!strcmp(node->inputs[i].node->name, input_name)) {
+return node->inputs[i].node;
+}
+}
+OVS_NOT_REACHED();
+return NULL;
+}
+
+void
+engine_add_input(struct engine_node *node, struct engine_node *input,
+ bool (*change_handler)(struct engine_node *))
+{
+ovs_assert(node->n_inputs < ENGINE_MAX_INPUT);
+node->inputs[node->n_inputs].node = input;
+node->inputs[node->n_inputs].change_handler = change_handler;
+node->n_inputs ++;
+}
+
+struct ovsdb_idl_index *
+engine_ovsdb_node_get_index(struct engine_node *node, const char *name)
+{
+struct ed_type_ovsdb_table *ed = (struct ed_type_ovsdb_table *)node->data;
+for (size_t i = 0; i < ed->n_indexes; i++) {
+if (!strcmp(ed->indexes[i].name, name)) {
+return ed->indexes[i].index;
+}
+}
+OVS_NOT_REACHED();
+return NULL;
+}
+
+void
+engine_ovsdb_node_add_index(struct engine_node *node, const char *name,
+struct ovsdb_idl_index *index)
+{
+struct ed_type_ovsdb_table *ed = (struct ed_type_ovsdb_table *)node->data;
+ovs_assert(ed->n_indexes < ENGINE_MAX_OVSDB_INDEX);
+
+ed->indexes[ed->n_indexes].name = name;
+ed->indexes[ed->n_indexes].index = index;
+ed->n_indexes ++;
+}
+
+void
+engine_run(struct engine_node *node, uint64_t run_id)
+{
+if (node->run_id == run_id) {
+return;
+}
+node->run_id = run_id;
+
+node->changed = false;
+if (!node->n_inputs) {
+node->run(node);
+VLOG_DBG("node: %s, changed: %d", node->name, node->changed);
+return;
+}
+
+for (size_t i = 0; i < node->n_inputs; i++) {
+engine_run(node->inputs[i].node, run_id);
+}
+
+bool need_compute = false;
+bool need_recompute = false;
+
+if (engine_force_recompute) {
+need_recompute = true;
+} else {
+for (size_t i = 0; i < node->n_inputs; i++) {
+if (node->inputs[i].node->changed) {
+

[ovs-dev] [RFC 01/14] ovsdb-idlc.in: Support more interfaces for passing pointers of individual tables.

2018-07-24 Thread Han Zhou
This is a follow-up patch for commit 0eb1e37c, to add more interfaces
that supports passing around pointers of individual tables, which will
be used in incremental processing.

Signed-off-by: Han Zhou 
---
 ovsdb/ovsdb-idlc.in | 25 +
 1 file changed, 25 insertions(+)

diff --git a/ovsdb/ovsdb-idlc.in b/ovsdb/ovsdb-idlc.in
index ee655f7..1c9483c 100755
--- a/ovsdb/ovsdb-idlc.in
+++ b/ovsdb/ovsdb-idlc.in
@@ -251,6 +251,7 @@ const struct %(s)s *%(s)s_table_first(const struct 
%(s)s_table *);
  (ROW) = (NEXT))
 
 const struct %(s)s *%(s)s_get_for_uuid(const struct ovsdb_idl *, const struct 
uuid *);
+const struct %(s)s *%(s)s_table_get_for_uuid(const struct %(s)s_table *, const 
struct uuid *);
 const struct %(s)s *%(s)s_first(const struct ovsdb_idl *);
 const struct %(s)s *%(s)s_next(const struct %(s)s *);
 #define %(S)s_FOR_EACH(ROW, IDL) \\
@@ -271,6 +272,13 @@ const struct %(s)s *%(s)s_track_get_next(const struct 
%(s)s *);
  (ROW); \\
  (ROW) = %(s)s_track_get_next(ROW))
 
+const struct %(s)s *%(s)s_table_track_get_first(const struct %(s)s_table *);
+#define %(S)s_TABLE_FOR_EACH_TRACKED(ROW, TABLE) \\
+for ((ROW) = %(s)s_table_track_get_first(TABLE); \\
+ (ROW); \\
+ (ROW) = %(s)s_track_get_next(ROW))
+
+
 /* Returns true if 'row' was inserted since the last change tracking reset. */
 static inline bool %(s)s_is_new(const struct %(s)s *row)
 {
@@ -479,6 +487,14 @@ const struct %(s)s *
 const struct ovsdb_idl *idl = (const struct ovsdb_idl *) table;
 return %(s)s_first(idl);
 }
+
+
+const struct %(s)s *
+%(s)s_table_track_get_first(const struct %(s)s_table *table)
+{
+const struct ovsdb_idl *idl =  (const struct ovsdb_idl *) table;
+return %(s)s_track_get_first(idl);
+}
 ''' % {'s': structName})
 
 # Parse functions.
@@ -655,6 +671,15 @@ const struct %(s)s *
 return %(s)s_cast(ovsdb_idl_get_row_for_uuid(idl, &%(p)stable_%(tl)s, 
uuid));
 }
 
+/* Searches table "%(t)s" for a row with UUID 'uuid'.  Returns
+ * a pointer to the row if there is one, otherwise a null pointer.  */
+const struct %(s)s *
+%(s)s_table_get_for_uuid(const struct %(s)s_table *table, const struct uuid 
*uuid)
+{
+const struct ovsdb_idl *idl = (const struct ovsdb_idl *) table;
+return %(s)s_get_for_uuid(idl, uuid);
+}
+
 /* Returns a row in table "%(t)s" in 'idl', or a null pointer if that
  * table is empty.
  *
-- 
2.1.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC 00/14] ovn-controller incremental processing.

2018-07-24 Thread Han Zhou
This patch series is the rebase of previous patch series [1] on top of master
where a series of changes that avoided passing ovsdb IDL (eaa4ead5) has been
merged. The major concern of the previous patches are the maintainability
of the dependencies. This patch series, thanks for the removal of passing
IDL directly, eliminates the access to any tables within any engine node
processing, and all dependencies are naturally exposed because otherwise
the code will either not pass compile or abort in the very first iteration,
except for one case - accessing data of other tables through references.

For exposing the dependencies introduced by reference access, it is a big
TODO item and it is the major reason this patch series is RFC only.

Other than this, there is another problem found while using ovsdb IDL index
to query chassis: current ovsdb IDL index is not updated for the changes made
in current transaction. It would be better if we fix the index implementation,
although this patch series worked around this problem by postpone some
processing to next iterations in such circumstances.

[1] https://mail.openvswitch.org/pipermail/ovs-dev/2018-June/347808.html

Han Zhou (14):
  ovsdb-idlc.in: Support more interfaces for passing pointers of
individual tables.
  ovn-controller: Incremental processing engine
  ovn-controller: Track OVSDB changes
  ovn-controller: Initial use of incremental engine.
  ovn-controller: Incremental logical flow processing
  ovn-controller: runtime_data change handler for SB port-binding
  ovn-controller: port-binding incremental processing for physical flows
  ovn-controller: incremental processing for multicast group changes
  ovsdb-idl: Tracking - preserve data for deleted rows.
  ovn-controller: Split addr_sets from runtime_data.
  ovn-controller: Maintain resource references for logical flows.
  ovn-controller: Incremental processing for address-set changes.
  ovn-controller: Split port_groups from runtime_data.
  ovn-controller: Incremental processing for port-group changes.

 include/ovn/actions.h   |3 +
 include/ovn/expr.h  |5 +-
 lib/ovsdb-idl-provider.h|2 +
 lib/ovsdb-idl.c |   36 +-
 ovn/controller/bfd.c|4 +-
 ovn/controller/binding.c|  108 ++-
 ovn/controller/binding.h|7 +
 ovn/controller/encaps.c |   12 +-
 ovn/controller/lflow.c  |  428 +++-
 ovn/controller/lflow.h  |  100 ++-
 ovn/controller/ofctrl.c |  262 +---
 ovn/controller/ofctrl.h |   32 +-
 ovn/controller/ovn-controller.c | 1398 +--
 ovn/controller/physical.c   |  232 +--
 ovn/controller/physical.h   |   20 +-
 ovn/lib/actions.c   |8 +-
 ovn/lib/automake.mk |4 +-
 ovn/lib/expr.c  |   21 +-
 ovn/lib/extend-table.c  |   60 +-
 ovn/lib/extend-table.h  |   16 +-
 ovn/lib/inc-proc-eng.c  |  201 ++
 ovn/lib/inc-proc-eng.h  |  240 +++
 ovn/utilities/ovn-trace.c   |2 +-
 ovsdb/ovsdb-idlc.in |   25 +
 tests/ovn.at|   75 +++
 tests/test-ovn.c|7 +-
 26 files changed, 2877 insertions(+), 431 deletions(-)
 create mode 100644 ovn/lib/inc-proc-eng.c
 create mode 100644 ovn/lib/inc-proc-eng.h

-- 
2.1.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netlink-conntrack: undef the correct macro

2018-07-24 Thread Ben Pfaff
On Mon, Jul 23, 2018 at 04:40:49PM -0400, Aaron Conole wrote:
> Fixes: 6830a0c0e6bf ("netlink-conntrack: New module.")
> Cc: Daniele Di Proietto 
> Signed-off-by: Aaron Conole 

Thanks, applied to master.

I didn't backport because I don't think there's any real harm here.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] faq: Add explanation of Signed-off-by.

2018-07-24 Thread Ben Pfaff
Suitable for cutting and pasting into explanatory emails.

Signed-off-by: Ben Pfaff 
---
 Documentation/faq/contributing.rst | 28 
 1 file changed, 28 insertions(+)

diff --git a/Documentation/faq/contributing.rst 
b/Documentation/faq/contributing.rst
index d5226f4f7f7b..cfc9cf7b5035 100644
--- a/Documentation/faq/contributing.rst
+++ b/Documentation/faq/contributing.rst
@@ -93,3 +93,31 @@ Q: How do I add support for a new OpenFlow error message?
 the file for details.  If you need to add an OpenFlow vendor extension
 error for a vendor that doesn't yet have any, first add the vendor ID to
 the ``_VENDOR_ID`` list in ``include/openflow/openflow-common.h``.
+
+Q: What's a Signed-off-by and how do I provide one?
+
+A: Free and open source software projects usually require a contributor to
+provide some assurance that they're entitled to contribute the code that
+they provide.  Some projects, for example, do this with a Contributor
+License Agreement (CLA) or a copyright assignment that is signed on paper
+or electronically.
+
+For this purpose, Open vSwitch has adopted something called the Developer's
+Certificate of Origin (DCO), which is also used by the Linux kernel and
+originated there.  Informally stated, agreeing to the DCO is the
+developer's way of attesting that a particular commit that they are
+contributing is one that they are allowed to contribute.  You should visit
+https://developercertificate.org/ to read the full statement of the DCO,
+which is less than 200 words long.
+
+To certify compliance with the Developer's Certificate of Origin for a
+particular commit, just add the following line to the end of your commit
+message, properly substituting your name and email address:
+
+Signed-off-by: Firstname Lastname 
+
+Git has special support for adding a Signed-off-by line to a commit
+message: when you run "git commit", just add the -s option, as in "git
+commit -s".  If you use the "git citool" GUI for commits, you can add a
+Signed-off-by line to the commit message by pressing Control+S.  Other Git
+user interfaces may provide similar support.
-- 
2.16.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [ovs-discuss] ovsdb-server core dump and ovsdb corruption using raft cluster

2018-07-24 Thread aginwala
Hi:

Glad to see more people picking up on raft testing.

Just to add on, you can also refer to
https://mail.openvswitch.org/pipermail/ovs-dev/2018-May/347765.html and
https://mail.openvswitch.org/pipermail/ovs-dev/2018-April/346375.html  where
there are couple of suggestions given by Ben too. See if you can skip
snapshot code  and still see the error. However,  the ask to skip snapshot
was to see if the performance would improve for testing purpose. I remember
tuning my VM memory, vcpus ,etc. and never ran into core dump issue again.



Regards,


On Tue, Jul 24, 2018 at 4:41 PM Yifeng Sun  wrote:

> My apologize, the patch has some issue. I need to dig further.
>
> Yifeng
>
> On Tue, Jul 24, 2018 at 1:40 PM, Yifeng Sun 
> wrote:
>
> > Hi Yun and Girish,
> >
> > I submitted a patch, do you mind testing and reviewing it? Thanks.
> >
> > [PATCH] dynamic-string: Fix a bug that leads to assertion fail
> >
> > diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c
> > index 6f7b610a9908..4564e420544d 100644
> > --- a/lib/dynamic-string.c
> > +++ b/lib/dynamic-string.c
> > @@ -158,7 +158,7 @@ ds_put_format_valist(struct ds *ds, const char
> > *format, va_list args_)
> >  if (needed < available) {
> >  ds->length += needed;
> >  } else {
> > -ds_reserve(ds, ds->length + needed);
> > +ds_reserve(ds, ds->allocated + needed);
> >
> >  va_copy(args, args_);
> >  available = ds->allocated - ds->length + 1;
> >
> >
> > Thanks,
> > Yifeng Sun
> >
> > On Wed, Jul 18, 2018 at 10:48 AM, Girish Moodalbail <
> gmoodalb...@gmail.com
> > > wrote:
> >
> >> Hello all,
> >>
> >> We are able to reproduce this issue on OVS 2.9.2 at will. The OVSDB NB
> >> server or OVSDB SB server dumps core while it is trying to compact the
> >> database.
> >>
> >> You can reproduce the issue by using:
> >>
> >> root@u1804-HVM-domU:/var/crash# ovs-appctl -t
> >> /var/run/openvswitch/ovnsb_db.ctl ovsdb-server/compact OVN_Southbound
> >>
> >> 2018-07-18T17:34:29Z|1|unixctl|WARN|error communicating with
> >> unix:/var/run/openvswitch/ovnsb_db.ctl: End of file
> >> ovs-appctl: /var/run/openvswitch/ovnsb_db.ctl: transaction error (End of
> >> file)
> >> root@u1804-HVM-domU:/var/crash#
> >> root@u1804-HVM-domU:/var/crash#
> >> root@u1804-HVM-domU:/var/crash# ERROR: apport (pid 17393) Wed Jul 18
> >> 10:34:23 2018: called for pid 14683, signal 6, core limit 0, dump mode 1
> >> ERROR: apport (pid 17393) Wed Jul 18 10:34:23 2018: executable:
> >> /usr/sbin/ovsdb-server (command line "ovsdb-server -vconsole:off
> >> -vfile:info --log-file=/var/log/openvswitch/ovsdb-server-sb.log
> >> --remote=punix:/var/run/openvswitch/ovnsb_db.sock
> >> --pidfile=/var/run/openvswitch/ovnsb_db.pid --unixctl=ovnsb_db.ctl
> >> --detach
> >> --monitor --remote=db:OVN_Southbound,SB_Global,connections
> >> --private-key=db:OVN_Southbound,SSL,private_key
> >> --certificate=db:OVN_Southbound,SSL,certificate
> >> --ca-cert=db:OVN_Southbound,SSL,ca_cert
> >> --ssl-protocols=db:OVN_Southbound,SSL,ssl_protocols
> >> --ssl-ciphers=db:OVN_Southbound,SSL,ssl_ciphers
> >> --remote=ptcp:6642:10.0.7.33 /etc/openvswitch/ovnsb_db.db")
> >> ERROR: apport (pid 17393) Wed Jul 18 10:34:23 2018:
> is_closing_session():
> >> no DBUS_SESSION_BUS_ADDRESS in environment
> >> ERROR: apport (pid 17393) Wed Jul 18 10:34:29 2018: wrote report
> >> /var/crash/_usr_sbin_ovsdb-server.0.crash
> >>
> >> Looking through the crash we see the following stack:
> >>
> >> (gdb) bt
> >> #0  __GI_raise (sig=sig@entry=6) at
> ../sysdeps/unix/sysv/linux/raise.c:51
> >> #1  0x7f7c9a43c801 in __GI_abort () at abort.c:79
> >> #2  0x7f7c9aaa633c in json_serialize (json=,
> >> s=) at lib/json.c:1554
> >> #3  0x7f7c9aaa63ab in json_serialize_object_member (i= out>,
> >> s=, node=, node=)
> >> at lib/json.c:1583
> >> #4  0x7f7c9aaa62f2 in json_serialize_object (s=0x7ffca2173ea0,
> >> object=0x5568dc5d5b10) at lib/json.c:1612
> >> #5  json_serialize (json=, s=0x7ffca2173ea0) at
> >> lib/json.c:1533
> >> #6  0x7f7c9aaa863c in json_to_ds (json=json@entry=0x5568dc5d4a20,
> >> flags=flags@entry=0, ds=ds@entry=0x7ffca2173f30) at lib/json.c:1511
> >> #7  0x7f7c9ae6750f in ovsdb_log_compose_record
> >> (json=json@entry=0x5568dc5d4a20,
> >> magic=0x5568dc5d5a60 "CLUSTER",
> >> header=header@entry=0x7ffca2173f10, data=data@entry=0x7ffca2173f30)
> >> at
> >> ovsdb/log.c:570
> >> #8  0x7f7c9ae677ef in ovsdb_log_write (file=0x5568dc5d5a80,
> >> json=0x5568dc5d4a20) at ovsdb/log.c:618
> >> #9  0x7f7c9ae6796e in ovsdb_log_write_and_free
> >> (log=log@entry=0x5568dc5d5a80,
> >> json=0x5568dc5d4a20) at ovsdb/log.c:651
> >> #10 0x7f7c9ae6d684 in raft_write_snapshot (raft=raft@entry
> >> =0x5568dc1e3720,
> >> log=0x5568dc5d5a80, new_log_start=new_log_start@entry=539578,
> >> new_snapshot=new_snapshot@entry=0x7ffca21740e0) at
> ovsdb/raft.c:3588
> >> #11 0x7f7c9ae6dbf3 in raft_save_snapshot (raft=raft@entry
> >> =0x5568dc1e3720,
> >> 

Re: [ovs-dev] [ovs-discuss] ovsdb-server core dump and ovsdb corruption using raft cluster

2018-07-24 Thread Yifeng Sun
My apologize, the patch has some issue. I need to dig further.

Yifeng

On Tue, Jul 24, 2018 at 1:40 PM, Yifeng Sun  wrote:

> Hi Yun and Girish,
>
> I submitted a patch, do you mind testing and reviewing it? Thanks.
>
> [PATCH] dynamic-string: Fix a bug that leads to assertion fail
>
> diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c
> index 6f7b610a9908..4564e420544d 100644
> --- a/lib/dynamic-string.c
> +++ b/lib/dynamic-string.c
> @@ -158,7 +158,7 @@ ds_put_format_valist(struct ds *ds, const char
> *format, va_list args_)
>  if (needed < available) {
>  ds->length += needed;
>  } else {
> -ds_reserve(ds, ds->length + needed);
> +ds_reserve(ds, ds->allocated + needed);
>
>  va_copy(args, args_);
>  available = ds->allocated - ds->length + 1;
>
>
> Thanks,
> Yifeng Sun
>
> On Wed, Jul 18, 2018 at 10:48 AM, Girish Moodalbail  > wrote:
>
>> Hello all,
>>
>> We are able to reproduce this issue on OVS 2.9.2 at will. The OVSDB NB
>> server or OVSDB SB server dumps core while it is trying to compact the
>> database.
>>
>> You can reproduce the issue by using:
>>
>> root@u1804-HVM-domU:/var/crash# ovs-appctl -t
>> /var/run/openvswitch/ovnsb_db.ctl ovsdb-server/compact OVN_Southbound
>>
>> 2018-07-18T17:34:29Z|1|unixctl|WARN|error communicating with
>> unix:/var/run/openvswitch/ovnsb_db.ctl: End of file
>> ovs-appctl: /var/run/openvswitch/ovnsb_db.ctl: transaction error (End of
>> file)
>> root@u1804-HVM-domU:/var/crash#
>> root@u1804-HVM-domU:/var/crash#
>> root@u1804-HVM-domU:/var/crash# ERROR: apport (pid 17393) Wed Jul 18
>> 10:34:23 2018: called for pid 14683, signal 6, core limit 0, dump mode 1
>> ERROR: apport (pid 17393) Wed Jul 18 10:34:23 2018: executable:
>> /usr/sbin/ovsdb-server (command line "ovsdb-server -vconsole:off
>> -vfile:info --log-file=/var/log/openvswitch/ovsdb-server-sb.log
>> --remote=punix:/var/run/openvswitch/ovnsb_db.sock
>> --pidfile=/var/run/openvswitch/ovnsb_db.pid --unixctl=ovnsb_db.ctl
>> --detach
>> --monitor --remote=db:OVN_Southbound,SB_Global,connections
>> --private-key=db:OVN_Southbound,SSL,private_key
>> --certificate=db:OVN_Southbound,SSL,certificate
>> --ca-cert=db:OVN_Southbound,SSL,ca_cert
>> --ssl-protocols=db:OVN_Southbound,SSL,ssl_protocols
>> --ssl-ciphers=db:OVN_Southbound,SSL,ssl_ciphers
>> --remote=ptcp:6642:10.0.7.33 /etc/openvswitch/ovnsb_db.db")
>> ERROR: apport (pid 17393) Wed Jul 18 10:34:23 2018: is_closing_session():
>> no DBUS_SESSION_BUS_ADDRESS in environment
>> ERROR: apport (pid 17393) Wed Jul 18 10:34:29 2018: wrote report
>> /var/crash/_usr_sbin_ovsdb-server.0.crash
>>
>> Looking through the crash we see the following stack:
>>
>> (gdb) bt
>> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
>> #1  0x7f7c9a43c801 in __GI_abort () at abort.c:79
>> #2  0x7f7c9aaa633c in json_serialize (json=,
>> s=) at lib/json.c:1554
>> #3  0x7f7c9aaa63ab in json_serialize_object_member (i=,
>> s=, node=, node=)
>> at lib/json.c:1583
>> #4  0x7f7c9aaa62f2 in json_serialize_object (s=0x7ffca2173ea0,
>> object=0x5568dc5d5b10) at lib/json.c:1612
>> #5  json_serialize (json=, s=0x7ffca2173ea0) at
>> lib/json.c:1533
>> #6  0x7f7c9aaa863c in json_to_ds (json=json@entry=0x5568dc5d4a20,
>> flags=flags@entry=0, ds=ds@entry=0x7ffca2173f30) at lib/json.c:1511
>> #7  0x7f7c9ae6750f in ovsdb_log_compose_record
>> (json=json@entry=0x5568dc5d4a20,
>> magic=0x5568dc5d5a60 "CLUSTER",
>> header=header@entry=0x7ffca2173f10, data=data@entry=0x7ffca2173f30)
>> at
>> ovsdb/log.c:570
>> #8  0x7f7c9ae677ef in ovsdb_log_write (file=0x5568dc5d5a80,
>> json=0x5568dc5d4a20) at ovsdb/log.c:618
>> #9  0x7f7c9ae6796e in ovsdb_log_write_and_free
>> (log=log@entry=0x5568dc5d5a80,
>> json=0x5568dc5d4a20) at ovsdb/log.c:651
>> #10 0x7f7c9ae6d684 in raft_write_snapshot (raft=raft@entry
>> =0x5568dc1e3720,
>> log=0x5568dc5d5a80, new_log_start=new_log_start@entry=539578,
>> new_snapshot=new_snapshot@entry=0x7ffca21740e0) at ovsdb/raft.c:3588
>> #11 0x7f7c9ae6dbf3 in raft_save_snapshot (raft=raft@entry
>> =0x5568dc1e3720,
>> new_start=new_start@entry=539578,
>> new_snapshot=new_snapshot@entry=0x7ffca21740e0) at ovsdb/raft.c:3647
>> #12 0x7f7c9ae757bd in raft_store_snapshot (raft=0x5568dc1e3720,
>> new_snapshot_data=new_snapshot_data@entry=0x5568dc5d49a0)
>> at ovsdb/raft.c:3849
>> #13 0x7f7c9ae7c7ae in ovsdb_storage_store_snapshot__
>> (storage=0x5568dc6b2fb0, schema=0x5568dd66f5a0, data=0x5568dca67880)
>> at ovsdb/storage.c:541
>> #14 0x7f7c9ae7d1de in ovsdb_storage_store_snapshot
>> (storage=0x5568dc6b2fb0, schema=schema@entry=0x5568dd66f5a0,
>> data=data@entry=0x5568dca67880) at ovsdb/storage.c:568
>> #15 0x7f7c9ae69cab in ovsdb_snapshot (db=0x5568dc6b3020) at
>> ovsdb/ovsdb.c:519
>> #16 0x5568daec1f82 in main_loop (is_backup=0x7ffca21742be,
>> exiting=0x7ffca21742bf, run_process=0x0, remotes=0x7ffca2174310,
>> 

Re: [ovs-dev] [PATCH] dynamic-string: Fix a bug that leads to assertion fail

2018-07-24 Thread Ben Pfaff
glibc 2.1 was released in 1999.

On Tue, Jul 24, 2018 at 04:09:00PM -0700, Yifeng Sun wrote:
> On ubuntu 16.04, vsnprintf shows below:
> 
>  The functions snprintf() and vsnprintf() do not write more than size bytes
> (including the ter‐
>minating null byte ('\0')).  If the output was truncated due to this
> limit,  then  the  return
>value  is the number of characters (excluding the terminating null
> byte) which would have been
>written to the final string if enough space had been available.
> Thus, a return value of  size
>or more means that the output was truncated.  (See also below under
> NOTES.)
> 
> The glibc implementation of the functions snprintf() and vsnprintf()
> conforms to the C99 stan‐
>dard,  that  is, behaves as described above, since glibc version
> 2.1.  Until glibc 2.0.6, they
>would return -1 when the output was truncated.
> 
> In this case, we need to check -1. There is definitely a bug here. I will
> create another patch.
> Thanks for the review.
> Yifeng
> 
> On Tue, Jul 24, 2018 at 3:55 PM, Ben Pfaff  wrote:
> 
> > Some pre-ANSI C99 implementations of (v)snprintf() returned -1 if the
> > output was truncated, but C99 and SUSv3 require it to return the number
> > of bytes that would have been written if the buffer was large enough,
> > and for at least the last 10 years or so glibc has implemented it that
> > way.
> >
> > Try building and running this program:
> >
> > #include 
> > int
> > main(void)
> > {
> >char x[9];
> >return snprintf(x, sizeof x, "0123456789");
> > }
> >
> > The exit status is 10, not 1.
> >
> > The glibc manual talks about this:
> >
> >  -- Function: int snprintf (char *S, size_t SIZE, const char *TEMPLATE,
> >   ...)
> >  Preliminary: | MT-Safe locale | AS-Unsafe heap | AC-Unsafe mem |
> >  *Note POSIX Safety Concepts::.
> >
> >  The 'snprintf' function is similar to 'sprintf', except that the
> >  SIZE argument specifies the maximum number of characters to
> >  produce.  The trailing null character is counted towards this
> >  limit, so you should allocate at least SIZE characters for the
> >  string S.  If SIZE is zero, nothing, not even the null byte, shall
> >  be written and S may be a null pointer.
> >
> >  The return value is the number of characters which would be
> >  generated for the given input, excluding the trailing null.  If
> >  this value is greater or equal to SIZE, not all characters from the
> >  result have been stored in S.  You should try again with a bigger
> >  output string.  Here is an example of doing this:
> >
> >   /* Construct a message describing the value of a variable
> >  whose name is NAME and whose value is VALUE. */
> >   char *
> >   make_message (char *name, char *value)
> >   {
> > /* Guess we need no more than 100 chars of space. */
> > int size = 100;
> > char *buffer = (char *) xmalloc (size);
> > int nchars;
> > if (buffer == NULL)
> >   return NULL;
> >
> >/* Try to print in the allocated space. */
> > nchars = snprintf (buffer, size, "value of %s is %s",
> >  name, value);
> > if (nchars >= size)
> >   {
> > /* Reallocate buffer now that we know
> >  how much space is needed. */
> > size = nchars + 1;
> > buffer = (char *) xrealloc (buffer, size);
> >
> > if (buffer != NULL)
> > /* Try again. */
> > snprintf (buffer, size, "value of %s is %s",
> >   name, value);
> >   }
> > /* The last call worked, return the string. */
> > return buffer;
> >   }
> >
> >  In practice, it is often easier just to use 'asprintf', below.
> >
> >  *Attention:* In versions of the GNU C Library prior to 2.1 the
> >  return value is the number of characters stored, not including the
> >  terminating null; unless there was not enough space in S to store
> >  the result in which case '-1' is returned.  This was changed in
> >  order to comply with the ISO C99 standard.
> >
> > On Tue, Jul 24, 2018 at 03:31:12PM -0700, Yifeng Sun wrote:
> > > Hi Ben,
> > >
> > > vsnprintf returns the size that was truncated. So we need at least
> > > ds->allocated + needed bytes to print the full string.
> > > needed = vsnprintf(>string[ds->length], available, format,
> > > args);
> > >
> > > So ds_reserve should make sure ds contains at least ds->allocated +
> > needed
> > > bytes.
> > > ds_reserve(ds, ds->allocated + needed);
> > >
> > > For example, if ds starts with:
> > > length = 4, allocated = 8
> > > Assume the to-be-printed string length = 10, then we got needed = 2
> > > In current code, 

Re: [ovs-dev] [PATCH 1/2] Prepare for 2.10.0.

2018-07-24 Thread Justin Pettit


> On Jul 24, 2018, at 3:56 PM, Ben Pfaff  wrote:
> 
> On Tue, Jul 24, 2018 at 03:35:20PM -0700, Justin Pettit wrote:
>> Signed-off-by: Justin Pettit 
> 
> Acked-by: Ben Pfaff 

Thanks.  I pushed the series to master, and created "branch-2.10" based on this 
commit.

--Justin


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 1/1] Permit to build OVS with only Python3 installed

2018-07-24 Thread Ben Pfaff
On Fri, Jun 22, 2018 at 08:04:10PM +0200, Timothy Redaelli wrote:
> This commit renames HAVE_PYTHON to HAVE_PYTHON2 and PYTHON to PYTHON2
> and adds HAVE_PYTHON and PYTHON with a different semantics:
> - If PYTHON environment variable is set, use it as PYTHON
> - If a python2 interpreter is available, PYTHON became the python2 interpreter
> - If a python3 interpreter is available, PYTHON became the python3 interpreter
> 
> PYTHON is only used to run the python scripts needed by the build system
> 
> NOTE:
> Since currently most of the utilities and bugtool doesn't support Python3,
> they're installed only if python2 is available. This will be fixed in later
> commits.
> 
> Signed-off-by: Timothy Redaelli 

Applied to master, thanks!
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dynamic-string: Fix a bug that leads to assertion fail

2018-07-24 Thread Yifeng Sun
On ubuntu 16.04, vsnprintf shows below:

 The functions snprintf() and vsnprintf() do not write more than size bytes
(including the ter‐
   minating null byte ('\0')).  If the output was truncated due to this
limit,  then  the  return
   value  is the number of characters (excluding the terminating null
byte) which would have been
   written to the final string if enough space had been available.
Thus, a return value of  size
   or more means that the output was truncated.  (See also below under
NOTES.)

The glibc implementation of the functions snprintf() and vsnprintf()
conforms to the C99 stan‐
   dard,  that  is, behaves as described above, since glibc version
2.1.  Until glibc 2.0.6, they
   would return -1 when the output was truncated.

In this case, we need to check -1. There is definitely a bug here. I will
create another patch.
Thanks for the review.
Yifeng

On Tue, Jul 24, 2018 at 3:55 PM, Ben Pfaff  wrote:

> Some pre-ANSI C99 implementations of (v)snprintf() returned -1 if the
> output was truncated, but C99 and SUSv3 require it to return the number
> of bytes that would have been written if the buffer was large enough,
> and for at least the last 10 years or so glibc has implemented it that
> way.
>
> Try building and running this program:
>
> #include 
> int
> main(void)
> {
>char x[9];
>return snprintf(x, sizeof x, "0123456789");
> }
>
> The exit status is 10, not 1.
>
> The glibc manual talks about this:
>
>  -- Function: int snprintf (char *S, size_t SIZE, const char *TEMPLATE,
>   ...)
>  Preliminary: | MT-Safe locale | AS-Unsafe heap | AC-Unsafe mem |
>  *Note POSIX Safety Concepts::.
>
>  The 'snprintf' function is similar to 'sprintf', except that the
>  SIZE argument specifies the maximum number of characters to
>  produce.  The trailing null character is counted towards this
>  limit, so you should allocate at least SIZE characters for the
>  string S.  If SIZE is zero, nothing, not even the null byte, shall
>  be written and S may be a null pointer.
>
>  The return value is the number of characters which would be
>  generated for the given input, excluding the trailing null.  If
>  this value is greater or equal to SIZE, not all characters from the
>  result have been stored in S.  You should try again with a bigger
>  output string.  Here is an example of doing this:
>
>   /* Construct a message describing the value of a variable
>  whose name is NAME and whose value is VALUE. */
>   char *
>   make_message (char *name, char *value)
>   {
> /* Guess we need no more than 100 chars of space. */
> int size = 100;
> char *buffer = (char *) xmalloc (size);
> int nchars;
> if (buffer == NULL)
>   return NULL;
>
>/* Try to print in the allocated space. */
> nchars = snprintf (buffer, size, "value of %s is %s",
>  name, value);
> if (nchars >= size)
>   {
> /* Reallocate buffer now that we know
>  how much space is needed. */
> size = nchars + 1;
> buffer = (char *) xrealloc (buffer, size);
>
> if (buffer != NULL)
> /* Try again. */
> snprintf (buffer, size, "value of %s is %s",
>   name, value);
>   }
> /* The last call worked, return the string. */
> return buffer;
>   }
>
>  In practice, it is often easier just to use 'asprintf', below.
>
>  *Attention:* In versions of the GNU C Library prior to 2.1 the
>  return value is the number of characters stored, not including the
>  terminating null; unless there was not enough space in S to store
>  the result in which case '-1' is returned.  This was changed in
>  order to comply with the ISO C99 standard.
>
> On Tue, Jul 24, 2018 at 03:31:12PM -0700, Yifeng Sun wrote:
> > Hi Ben,
> >
> > vsnprintf returns the size that was truncated. So we need at least
> > ds->allocated + needed bytes to print the full string.
> > needed = vsnprintf(>string[ds->length], available, format,
> > args);
> >
> > So ds_reserve should make sure ds contains at least ds->allocated +
> needed
> > bytes.
> > ds_reserve(ds, ds->allocated + needed);
> >
> > For example, if ds starts with:
> > length = 4, allocated = 8
> > Assume the to-be-printed string length = 10, then we got needed = 2
> > In current code, ds_reserve(4 + 2 = 6) is called, if go into
> ds_reserve(),
> > since (6 < 8), ds_reserve actually does nothing.
> >
> > Thanks,
> > Yifeng
> >
> > On Tue, Jul 24, 2018 at 2:47 PM, Ben Pfaff  wrote:
> >
> > > On Tue, Jul 24, 2018 at 08:37:08AM -0700, Yifeng Sun wrote:
> > > > 'needed' should be size of needed memory space beyond allocated.
> 

Re: [ovs-dev] [PATCH] ovs-tcpundump: Fix incompatibilities with python3

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 06:35:22PM +0200, Timothy Redaelli wrote:
> Added parenthesis after print and use "as" instead of "," in except.
> 
> This commit fixes also a couple of flake8 warnings:
> 
> utilities/ovs-tcpundump:23:1: E302 expected 2 blank lines, found 1
> utilities/ovs-tcpundump:35:1: E305 expected 2 blank lines after class or
> function definition, found 1
> 
> Tested on Python 2.7.15 and Python 3.6.5
> 
> Signed-off-by: Timothy Redaelli 

Applied to master, thanks!
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ovs-tcpdump: Fix incompatibilities with python3

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 06:35:13PM +0200, Timothy Redaelli wrote:
> Opening a file with 'rw' in Python3 returns an error, moreover using 'rw' in
> Python2 is wrong too since it opens the file using O_RDONLY and not by using
> O_RDWR.
> 
> This commit fixes it by using the low-level os.open function with O_RDWR
> as suggested by the Linux kernel (tuntap.txt) documentation.
> 
> This commit fixes also some usual bytes vs string incompatibilities.
> 
> Tested on Python 2.7.15 and Python 3.6.5
> 
> Signed-off-by: Timothy Redaelli 

Applied to master, thanks!
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/5] netdev-dpdk: Fix sparse complaints.

2018-07-24 Thread Ben Pfaff
It's because htons() and ntohs() actually do the same thing (swap
bytes).

On Mon, Jul 16, 2018 at 11:06:56AM -0700, Yifeng Sun wrote:
> Thanks for the fix. I am wondering why there was no running issue when
> dl_type is compared with wrong byte order.
> 
> Reviewed-by: Yifeng Sun 
> 
> On Thu, Jul 12, 2018 at 2:55 PM, Ben Pfaff  wrote:
> 
> > Neither of these is a real problem.
> >
> > Signed-off-by: Ben Pfaff 
> > ---
> >  lib/netdev-dpdk.c | 6 +++---
> >  1 file changed, 3 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> > index b4ed4ad5919c..d485a53dacf1 100644
> > --- a/lib/netdev-dpdk.c
> > +++ b/lib/netdev-dpdk.c
> > @@ -4384,7 +4384,7 @@ netdev_dpdk_add_rte_flow_offload(struct netdev
> > *netdev,
> >  struct rte_flow_item_ipv4 ipv4_mask;
> >  memset(_spec, 0, sizeof(ipv4_spec));
> >  memset(_mask, 0, sizeof(ipv4_mask));
> > -if (match->flow.dl_type == ntohs(ETH_TYPE_IP)) {
> > +if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
> >
> >  ipv4_spec.hdr.type_of_service = match->flow.nw_tos;
> >  ipv4_spec.hdr.time_to_live= match->flow.nw_ttl;
> > @@ -4419,8 +4419,8 @@ netdev_dpdk_add_rte_flow_offload(struct netdev
> > *netdev,
> >  goto out;
> >  }
> >
> > -if ((match->wc.masks.tp_src && match->wc.masks.tp_src != 0x) ||
> > -(match->wc.masks.tp_dst && match->wc.masks.tp_dst != 0x)) {
> > +if ((match->wc.masks.tp_src && match->wc.masks.tp_src !=
> > OVS_BE16_MAX) ||
> > +(match->wc.masks.tp_dst && match->wc.masks.tp_dst !=
> > OVS_BE16_MAX)) {
> >  ret = -1;
> >  goto out;
> >  }
> > --
> > 2.16.1
> >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] Prepare for post-2.10.0 (2.10.90).

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 03:35:21PM -0700, Justin Pettit wrote:
> Signed-off-by: Justin Pettit 

Acked-by: Ben Pfaff 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dynamic-string: Fix a bug that leads to assertion fail

2018-07-24 Thread Ben Pfaff
Some pre-ANSI C99 implementations of (v)snprintf() returned -1 if the
output was truncated, but C99 and SUSv3 require it to return the number
of bytes that would have been written if the buffer was large enough,
and for at least the last 10 years or so glibc has implemented it that
way.

Try building and running this program:

#include 
int
main(void)
{
   char x[9];
   return snprintf(x, sizeof x, "0123456789");
}

The exit status is 10, not 1.

The glibc manual talks about this:

 -- Function: int snprintf (char *S, size_t SIZE, const char *TEMPLATE,
  ...)
 Preliminary: | MT-Safe locale | AS-Unsafe heap | AC-Unsafe mem |
 *Note POSIX Safety Concepts::.

 The 'snprintf' function is similar to 'sprintf', except that the
 SIZE argument specifies the maximum number of characters to
 produce.  The trailing null character is counted towards this
 limit, so you should allocate at least SIZE characters for the
 string S.  If SIZE is zero, nothing, not even the null byte, shall
 be written and S may be a null pointer.

 The return value is the number of characters which would be
 generated for the given input, excluding the trailing null.  If
 this value is greater or equal to SIZE, not all characters from the
 result have been stored in S.  You should try again with a bigger
 output string.  Here is an example of doing this:

  /* Construct a message describing the value of a variable
 whose name is NAME and whose value is VALUE. */
  char *
  make_message (char *name, char *value)
  {
/* Guess we need no more than 100 chars of space. */
int size = 100;
char *buffer = (char *) xmalloc (size);
int nchars;
if (buffer == NULL)
  return NULL;

   /* Try to print in the allocated space. */
nchars = snprintf (buffer, size, "value of %s is %s",
 name, value);
if (nchars >= size)
  {
/* Reallocate buffer now that we know
 how much space is needed. */
size = nchars + 1;
buffer = (char *) xrealloc (buffer, size);

if (buffer != NULL)
/* Try again. */
snprintf (buffer, size, "value of %s is %s",
  name, value);
  }
/* The last call worked, return the string. */
return buffer;
  }

 In practice, it is often easier just to use 'asprintf', below.

 *Attention:* In versions of the GNU C Library prior to 2.1 the
 return value is the number of characters stored, not including the
 terminating null; unless there was not enough space in S to store
 the result in which case '-1' is returned.  This was changed in
 order to comply with the ISO C99 standard.

On Tue, Jul 24, 2018 at 03:31:12PM -0700, Yifeng Sun wrote:
> Hi Ben,
> 
> vsnprintf returns the size that was truncated. So we need at least
> ds->allocated + needed bytes to print the full string.
> needed = vsnprintf(>string[ds->length], available, format,
> args);
> 
> So ds_reserve should make sure ds contains at least ds->allocated + needed
> bytes.
> ds_reserve(ds, ds->allocated + needed);
> 
> For example, if ds starts with:
> length = 4, allocated = 8
> Assume the to-be-printed string length = 10, then we got needed = 2
> In current code, ds_reserve(4 + 2 = 6) is called, if go into ds_reserve(),
> since (6 < 8), ds_reserve actually does nothing.
> 
> Thanks,
> Yifeng
> 
> On Tue, Jul 24, 2018 at 2:47 PM, Ben Pfaff  wrote:
> 
> > On Tue, Jul 24, 2018 at 08:37:08AM -0700, Yifeng Sun wrote:
> > > 'needed' should be size of needed memory space beyond allocated.
> > >
> > > Signed-off-by: Yifeng Sun 
> > > Reported-by: Yun Zhou 
> > > Reported-by: Girish Moodalbail 
> >
> > I don't see a bug here.  Can you explain why you think that there is a
> > bug?
> >
> > (I note that this code dates back to before 2008.)
> >
> > Thanks,
> >
> > Ben.
> >
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] Prepare for 2.10.0.

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 03:35:20PM -0700, Justin Pettit wrote:
> Signed-off-by: Justin Pettit 

Acked-by: Ben Pfaff 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Stokes, Ian
> On Tue, Jul 24, 2018 at 10:01:17PM +, Stokes, Ian wrote:
> > Hi Ben,
> >
> > The following changes since commit
> 1ac690899592f97520aa1c959a623175e642f0a4:
> >
> >   NEWS: Mention daemon mode support for ovn-nbctl. (2018-07-24 09:14:28
> -0700)
> >
> > are available in the git repository at:
> >
> >   https://github.com/istokes/ovs dpdk_merge
> >
> > for you to fetch changes up to 7a2ce387d8174b4b57133eb1f3689aa833e911c5:
> >
> >   sparse: Add support for DPDK. (2018-07-24 22:36:56 +0100)
> >
> > 
> > Ben Pfaff (4):
> >   netdev-dpdk: Fix incorrect byte order conversion in log message.
> >   netdev-dpdk: Fix sparse complaints.
> >   netdev-dpdk: Use ETH_ADDR_BYTES_ARGS instead of open-coding it.
> >   sparse: Add support for DPDK.
> >
> > Ian Stokes (1):
> >   Docs: Improve OVS DPDK version mapping notice.
> >
> > Yipeng Wang (1):
> >   dpif-netdev: Add SMC cache after EMC cache
> 
> Thanks, applied to master.

Thanks Ben, much appreciated.

Ian
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 10:01:17PM +, Stokes, Ian wrote:
> Hi Ben,
> 
> The following changes since commit 1ac690899592f97520aa1c959a623175e642f0a4:
> 
>   NEWS: Mention daemon mode support for ovn-nbctl. (2018-07-24 09:14:28 -0700)
> 
> are available in the git repository at:
> 
>   https://github.com/istokes/ovs dpdk_merge
> 
> for you to fetch changes up to 7a2ce387d8174b4b57133eb1f3689aa833e911c5:
> 
>   sparse: Add support for DPDK. (2018-07-24 22:36:56 +0100)
> 
> 
> Ben Pfaff (4):
>   netdev-dpdk: Fix incorrect byte order conversion in log message.
>   netdev-dpdk: Fix sparse complaints.
>   netdev-dpdk: Use ETH_ADDR_BYTES_ARGS instead of open-coding it.
>   sparse: Add support for DPDK.
> 
> Ian Stokes (1):
>   Docs: Improve OVS DPDK version mapping notice.
> 
> Yipeng Wang (1):
>   dpif-netdev: Add SMC cache after EMC cache

Thanks, applied to master.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/2] Prepare for 2.10.0.

2018-07-24 Thread Justin Pettit
Signed-off-by: Justin Pettit 
---
 NEWS | 4 ++--
 configure.ac | 2 +-
 debian/changelog | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/NEWS b/NEWS
index 2414b9199ae0..b0aaf88bbfb4 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,5 @@
-Post-v2.9.0
-
+v2.10.0 - xx xxx 
+-
- ovs-vswitchd and utilities now support DNS names in OpenFlow and
  OVSDB remotes.
- ovs-vswitchd:
diff --git a/configure.ac b/configure.ac
index c89c607c7124..009e5c98ef0f 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(openvswitch, 2.9.90, b...@openvswitch.org)
+AC_INIT(openvswitch, 2.10.0, b...@openvswitch.org)
 AC_CONFIG_SRCDIR([datapath/datapath.c])
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
diff --git a/debian/changelog b/debian/changelog
index 2881d332fbd6..b4174c9f393f 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,8 +1,8 @@
-openvswitch (2.9.90-1) unstable; urgency=low
+openvswitch (2.10.0-1) unstable; urgency=low
 
* New upstream version
 
- -- Open vSwitch team   Wed, 17 Jan 2018 09:52:17 -0700
+ -- Open vSwitch team   Tue, 24 Jul 2018 15:27:38 -0700
 
 openvswitch (2.9.0-1) unstable; urgency=low
 
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/2] Prepare for post-2.10.0 (2.10.90).

2018-07-24 Thread Justin Pettit
Signed-off-by: Justin Pettit 
---
 NEWS | 4 
 configure.ac | 2 +-
 debian/changelog | 6 ++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index b0aaf88bbfb4..61026bc9c223 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,7 @@
+Post-v2.10.0
+-
+
+
 v2.10.0 - xx xxx 
 -
- ovs-vswitchd and utilities now support DNS names in OpenFlow and
diff --git a/configure.ac b/configure.ac
index 009e5c98ef0f..8390cf484f29 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(openvswitch, 2.10.0, b...@openvswitch.org)
+AC_INIT(openvswitch, 2.10.90, b...@openvswitch.org)
 AC_CONFIG_SRCDIR([datapath/datapath.c])
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
diff --git a/debian/changelog b/debian/changelog
index b4174c9f393f..1d74bb2a299c 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+openvswitch (2.10.90-1) unstable; urgency=low
+
+   * New upstream version
+
+ -- Open vSwitch team   Tue, 24 Jul 2018 15:29:17 -0700
+
 openvswitch (2.10.0-1) unstable; urgency=low
 
* New upstream version
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dynamic-string: Fix a bug that leads to assertion fail

2018-07-24 Thread Yifeng Sun
Hi Ben,

vsnprintf returns the size that was truncated. So we need at least
ds->allocated + needed bytes to print the full string.
needed = vsnprintf(>string[ds->length], available, format,
args);

So ds_reserve should make sure ds contains at least ds->allocated + needed
bytes.
ds_reserve(ds, ds->allocated + needed);

For example, if ds starts with:
length = 4, allocated = 8
Assume the to-be-printed string length = 10, then we got needed = 2
In current code, ds_reserve(4 + 2 = 6) is called, if go into ds_reserve(),
since (6 < 8), ds_reserve actually does nothing.

Thanks,
Yifeng

On Tue, Jul 24, 2018 at 2:47 PM, Ben Pfaff  wrote:

> On Tue, Jul 24, 2018 at 08:37:08AM -0700, Yifeng Sun wrote:
> > 'needed' should be size of needed memory space beyond allocated.
> >
> > Signed-off-by: Yifeng Sun 
> > Reported-by: Yun Zhou 
> > Reported-by: Girish Moodalbail 
>
> I don't see a bug here.  Can you explain why you think that there is a
> bug?
>
> (I note that this code dates back to before 2008.)
>
> Thanks,
>
> Ben.
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] datapath: add transport ports in route lookup to enable IPsec policy match.

2018-07-24 Thread Gregory Rose

On 6/18/2018 11:14 AM, Qiuyu Xiao wrote:

This patch adds transport ports information for route lookup so that IPsec
can select tunnel traffic (geneve, stt, vxlan) to do encryption.

The patch was tested for geneve, stt, and vxlan tunnel and the results
show that IPsec policy can be set to only match the corresponding tunnel
traffic.

Signed-off-by: Qiuyu Xiao 


LGTM

I did some minimal regression testing with make check and make check-kmod.

Qiuyu,

Normally we push changes upstream first and then backport to the out of 
tree repo here on
github.  However, in this case since there is no upstream stt driver I 
guess we can do things

a bit differently.

You'll need to pull out the changes for vxlan and geneve and create a 
patch against those driver
modules for upstream Linux.  If needed, contact me via direct email for 
some help on how to do that.

There is no upstream stt support so you can drop that part of the patch.

Thanks!

Reviewed-by: Greg Rose 
Tested-by: Greg Rose 


---
  datapath/linux/compat/geneve.c | 29 +++--
  datapath/linux/compat/stt.c| 15 ++-
  datapath/linux/compat/vxlan.c  | 14 --
  3 files changed, 41 insertions(+), 17 deletions(-)

diff --git a/datapath/linux/compat/geneve.c b/datapath/linux/compat/geneve.c
index 435a23f..95a665d 100644
--- a/datapath/linux/compat/geneve.c
+++ b/datapath/linux/compat/geneve.c
@@ -836,7 +836,8 @@ free_dst:
  static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
   struct net_device *dev,
   struct flowi4 *fl4,
-  struct ip_tunnel_info *info)
+  struct ip_tunnel_info *info,
+  __be16 dport, __be16 sport)
  {
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
@@ -850,6 +851,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
memset(fl4, 0, sizeof(*fl4));
fl4->flowi4_mark = skb->mark;
fl4->flowi4_proto = IPPROTO_UDP;
+   fl4->fl4_dport = dport;
+   fl4->fl4_sport = sport;
  
  	if (info) {

fl4->daddr = info->key.u.ipv4.dst;
@@ -895,7 +898,8 @@ static struct rtable *geneve_get_v4_rt(struct sk_buff *skb,
  static struct dst_entry *geneve_get_v6_dst(struct sk_buff *skb,
   struct net_device *dev,
   struct flowi6 *fl6,
-  struct ip_tunnel_info *info)
+  struct ip_tunnel_info *info,
+ __be16 dport, __be16 sport)
  {
bool use_cache = ip_tunnel_dst_cache_usable(skb, info);
struct geneve_dev *geneve = netdev_priv(dev);
@@ -911,6 +915,8 @@ static struct dst_entry *geneve_get_v6_dst(struct sk_buff 
*skb,
memset(fl6, 0, sizeof(*fl6));
fl6->flowi6_mark = skb->mark;
fl6->flowi6_proto = IPPROTO_UDP;
+   fl6->fl6_dport = dport;
+   fl6->fl6_sport = sport;
  
  	if (info) {

fl6->daddr = info->key.u.ipv6.dst;
@@ -1005,13 +1011,13 @@ static netdev_tx_t geneve_xmit_skb(struct sk_buff *skb, 
struct net_device *dev,
goto tx_error;
}
  
-	rt = geneve_get_v4_rt(skb, dev, , info);

+   sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
+   rt = geneve_get_v4_rt(skb, dev, , info, geneve->dst_port, sport);
if (IS_ERR(rt)) {
err = PTR_ERR(rt);
goto tx_error;
}
  
-	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);

skb_reset_mac_header(skb);
  
  	iip = ip_hdr(skb);

@@ -1097,13 +1103,13 @@ static netdev_tx_t geneve6_xmit_skb(struct sk_buff 
*skb, struct net_device *dev,
}
}
  
-	dst = geneve_get_v6_dst(skb, dev, , info);

+   sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
+   dst = geneve_get_v6_dst(skb, dev, , info, geneve->dst_port, sport);
if (IS_ERR(dst)) {
err = PTR_ERR(dst);
goto tx_error;
}
  
-	sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);

skb_reset_mac_header(skb);
  
  	iip = ip_hdr(skb);

@@ -1232,13 +1238,17 @@ int ovs_geneve_fill_metadata_dst(struct net_device 
*dev, struct sk_buff *skb)
struct geneve_dev *geneve = netdev_priv(dev);
struct rtable *rt;
struct flowi4 fl4;
+   __be16 sport;
  #if IS_ENABLED(CONFIG_IPV6)
struct dst_entry *dst;
struct flowi6 fl6;
  #endif
  
+	sport = udp_flow_src_port(geneve->net, skb,

+1, USHRT_MAX, true);
+
if (ip_tunnel_info_af(info) == AF_INET) {
-   rt = geneve_get_v4_rt(skb, dev, , info);
+   rt = geneve_get_v4_rt(skb, dev, , info, 

[ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Stokes, Ian
Hi Ben,

The following changes since commit 1ac690899592f97520aa1c959a623175e642f0a4:

  NEWS: Mention daemon mode support for ovn-nbctl. (2018-07-24 09:14:28 -0700)

are available in the git repository at:

  https://github.com/istokes/ovs dpdk_merge

for you to fetch changes up to 7a2ce387d8174b4b57133eb1f3689aa833e911c5:

  sparse: Add support for DPDK. (2018-07-24 22:36:56 +0100)


Ben Pfaff (4):
  netdev-dpdk: Fix incorrect byte order conversion in log message.
  netdev-dpdk: Fix sparse complaints.
  netdev-dpdk: Use ETH_ADDR_BYTES_ARGS instead of open-coding it.
  sparse: Add support for DPDK.

Ian Stokes (1):
  Docs: Improve OVS DPDK version mapping notice.

Yipeng Wang (1):
  dpif-netdev: Add SMC cache after EMC cache

 Documentation/howto/dpdk.rst |6 +-
 Documentation/intro/install/dpdk.rst |6 +-
 Documentation/topics/dpdk/bridge.rst |   15 ++
 Makefile.am  |2 +-
 NEWS |2 +
 build-aux/initial-tab-whitelist  |1 +
 include/sparse/automake.mk   |9 ++
 include/sparse/rte_byteorder.h   |  281 
+
 include/sparse/rte_esp.h |   65 +
 include/sparse/rte_flow.h| 1483 
+++
 include/sparse/rte_icmp.h|  106 ++
 include/sparse/rte_ip.h  |  490 
+++
 include/sparse/rte_sctp.h|  103 ++
 include/sparse/rte_tcp.h |  108 ++
 include/sparse/rte_udp.h |  103 ++
 include/sparse/xmmintrin.h   |   24 
 lib/cmap.c   |   74 ++
 lib/cmap.h   |   11 ++
 lib/dpif-netdev-perf.h   |1 +
 lib/dpif-netdev.c|  329 
++-
 lib/netdev-dpdk.c|   26 ++--
 tests/pmd.at |7 +-
 vswitchd/vswitch.xml |   13 ++
 23 files changed, 3204 insertions(+), 61 deletions(-)
 create mode 100644 include/sparse/rte_byteorder.h
 create mode 100644 include/sparse/rte_esp.h
 create mode 100644 include/sparse/rte_flow.h
 create mode 100644 include/sparse/rte_icmp.h
 create mode 100644 include/sparse/rte_ip.h
 create mode 100644 include/sparse/rte_sctp.h
 create mode 100644 include/sparse/rte_tcp.h
 create mode 100644 include/sparse/rte_udp.h
 create mode 100644 include/sparse/xmmintrin.h

Thanks
Ian
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v6 00/13] Support multi-segment mbufs

2018-07-24 Thread Stokes, Ian
> On Tue, Jul 24, 2018 at 06:20:04PM +, Stokes, Ian wrote:
> > > On Tue, Jul 24, 2018 at 07:55:39PM +0300, Ilya Maximets wrote:
> > > > Hi.
> > > > Just wanted to add some comments for the use-cases and testing
> > > methodology.
> > > > See inline.
> > > >
> > > > And I'm actually not sure if there any profit from this patch set?
> > > > It looks like an internal mbuf handling rework that only degrades
> > > > the performance and complicates the code.
> > > >
> > > > Please, don't consider me as merge blocker. I just want to
> > > > understand why you think we need this 1200 LOCs?
> > > >
> > > > ---
> > > > About 'resize()' related discussion:
> > > > Maybe it's worth to allow dp_packet APIs to return different
> dp_packet.
> > > > In this case we'll be able to just clone the packet to malloced
> > > > memory and resize in cases of not enough headroom available.
> > > > Like:
> > > > packet = eth_push_vlan(packet, vlan->vlan_tpid, vlan-
> >vlan_tci); or
> > > > eth_push_vlan(, vlan->vlan_tpid, vlan->vlan_tci);
> > > >
> > > > This will have a little performance penalty in compare with data
> > > > shifting inside the mbuf, but will be much more elegant, and will
> > > > allow to eliminate all the OVS_NOT_REACHED cases.
> > > >
> > > >
> > > > Best regards, Ilya Maximets.
> > >
> > > Hmm, this is the second person from whom I've heard serious
> > > misgivings about this patch series.  Tiago, Ian, would you like to
> > > respond?  I'm a little nervous about merging this patch series,
> > > especially relatively late before branching, given that some people
> have technical objections to it.
> > >
> >
> > I think Tiago is currently responding to the queries Ilya has raised but
> I'd like to respond to this being included in the 2.10 release
> specifically.
> >
> > The feature has been in development for a while and up until Tiago had
> taken over the work 2 months had received little feedback.
> >
> > That being said we have had a number of people testing the feature over
> the last month+ who are familiar with OVS DPDK and have since signed off
> on it as it is enabling a common customer requirement for TSO/GRO
> offloads.
> >
> > This is just a stepping stone to TSO and GRO enablement for OVS DPDK
> which is called out as a feature gap between kernel Ovs and userspace OVS,
> the work will be continued from our side with the aim of for OVS 2.11 so
> any changes required can be refined.
> >
> > From a validation POV it's not breaking anything existing for OVS DPDK
> that I've have come across so the risk on that side is low.
> >
> > Would it be preferred to hold off merging this until the TSO/GRO aspects
> have been implemented also? I guess my view here was that I would like to
> enable users to begin using the multiseg feature with the 2.10 release and
> we can refine our approach if needed based on the community feedback.
> >
> > There are some interesting discussions such as changes to the dp_packet
> API above, but it's a pity it comes at a critical time window. I wonder
> are the API changes separate to this work? Impact would be felt outside of
> the multiseg also I would think as they are APIs. If there are changes
> required to the APIs I would envision we would update the mutliseg feature
> to work with these changes as they are introduced in the future.
> >
> > If you feel this is too much of a risk we can remove it from 2.10. I can
> respin a new pull request with the remaining patches.
> 
> After some consideration, I think we should leave this out of 2.10.  I'm
> happy to merge it into master post-branch.  That way it'll have about 6
> months to cook on master and possibly acquire these additional benefits
> you mention.
> 
> Would you please respin the PR to leave this out?

No problem, I'll respin a new pull request now.

Thanks
Ian

> 
> Thanks,
> 
> Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dynamic-string: Fix a bug that leads to assertion fail

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 08:37:08AM -0700, Yifeng Sun wrote:
> 'needed' should be size of needed memory space beyond allocated.
> 
> Signed-off-by: Yifeng Sun 
> Reported-by: Yun Zhou 
> Reported-by: Girish Moodalbail 

I don't see a bug here.  Can you explain why you think that there is a
bug?

(I note that this code dates back to before 2008.)

Thanks,

Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v6 00/13] Support multi-segment mbufs

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 06:20:04PM +, Stokes, Ian wrote:
> > On Tue, Jul 24, 2018 at 07:55:39PM +0300, Ilya Maximets wrote:
> > > Hi.
> > > Just wanted to add some comments for the use-cases and testing
> > methodology.
> > > See inline.
> > >
> > > And I'm actually not sure if there any profit from this patch set?
> > > It looks like an internal mbuf handling rework that only degrades the
> > > performance and complicates the code.
> > >
> > > Please, don't consider me as merge blocker. I just want to understand
> > > why you think we need this 1200 LOCs?
> > >
> > > ---
> > > About 'resize()' related discussion:
> > > Maybe it's worth to allow dp_packet APIs to return different dp_packet.
> > > In this case we'll be able to just clone the packet to malloced memory
> > > and resize in cases of not enough headroom available.
> > > Like:
> > >   packet = eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); or
> > >   eth_push_vlan(, vlan->vlan_tpid, vlan->vlan_tci);
> > >
> > > This will have a little performance penalty in compare with data
> > > shifting inside the mbuf, but will be much more elegant, and will
> > > allow to eliminate all the OVS_NOT_REACHED cases.
> > >
> > >
> > > Best regards, Ilya Maximets.
> > 
> > Hmm, this is the second person from whom I've heard serious misgivings
> > about this patch series.  Tiago, Ian, would you like to respond?  I'm a
> > little nervous about merging this patch series, especially relatively late
> > before branching, given that some people have technical objections to it.
> > 
> 
> I think Tiago is currently responding to the queries Ilya has raised but I'd 
> like to respond to this being included in the 2.10 release specifically.
> 
> The feature has been in development for a while and up until Tiago had taken 
> over the work 2 months had received little feedback.
> 
> That being said we have had a number of people testing the feature over the 
> last month+ who are familiar with OVS DPDK and have since signed off on it as 
> it is enabling a common customer requirement for TSO/GRO offloads.
> 
> This is just a stepping stone to TSO and GRO enablement for OVS DPDK which is 
> called out as a feature gap between kernel Ovs and userspace OVS, the work 
> will be continued from our side with the aim of for OVS 2.11 so any changes 
> required can be refined.
> 
> From a validation POV it's not breaking anything existing for OVS DPDK that 
> I've have come across so the risk on that side is low.
> 
> Would it be preferred to hold off merging this until the TSO/GRO aspects have 
> been implemented also? I guess my view here was that I would like to enable 
> users to begin using the multiseg feature with the 2.10 release and we can 
> refine our approach if needed based on the community feedback.
> 
> There are some interesting discussions such as changes to the dp_packet API 
> above, but it's a pity it comes at a critical time window. I wonder are the 
> API changes separate to this work? Impact would be felt outside of the 
> multiseg also I would think as they are APIs. If there are changes required 
> to the APIs I would envision we would update the mutliseg feature to work 
> with these changes as they are introduced in the future.
> 
> If you feel this is too much of a risk we can remove it from 2.10. I can 
> respin a new pull request with the remaining patches.

After some consideration, I think we should leave this out of 2.10.  I'm
happy to merge it into master post-branch.  That way it'll have about 6
months to cook on master and possibly acquire these additional benefits
you mention.

Would you please respin the PR to leave this out?

Thanks,

Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ofproto-dpif: Fix typo in registered command

2018-07-24 Thread Ben Pfaff
On Mon, Jul 23, 2018 at 05:45:30PM +0300, Alin Gabriel Serdean wrote:
> Also split line at 79 characters.
> 
> Found by inspection.
> 
> Signed-off-by: Alin Gabriel Serdean 

Acked-by: Ben Pfaff 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Ben Pfaff
Hi Ian and Tiago.

Commit 0e75dd821656 ("dpdk-tests: Add uni-tests for multi-seg mbufs.")
in this pull introduces the following warning from automake:

tests/automake.mk:396: warning: tests_ovstest_SOURCES was already defined 
in condition TRUE, which includes condition DPDK_NETDEV ...
Makefile.am:480:   'tests/automake.mk' included from here
tests/automake.mk:353: ... 'tests_ovstest_SOURCES' previously defined here
Makefile.am:480:   'tests/automake.mk' included from here

I think that the following fix is necessary:

diff --git a/tests/automake.mk b/tests/automake.mk
index b3941d0d2109..5fe98bd2124a 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -393,7 +393,7 @@ tests_ovstest_SOURCES = \
tests/test-aa.c \
tests/test-stopwatch.c
 if DPDK_NETDEV
-tests_ovstest_SOURCES = \
+tests_ovstest_SOURCES += \
tests/test-dpdk-mbufs.c
 endif
 
If you agree, Ian, would you mind adding that to the pull request?

Thanks,

Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] ofproto-dpif: Fix typo in registered command

2018-07-24 Thread Yifeng Sun
Looks good to me, thanks.

Reviewed-by: Yifeng Sun 

On Mon, Jul 23, 2018 at 7:45 AM, Alin Gabriel Serdean 
wrote:

> Also split line at 79 characters.
>
> Found by inspection.
>
> Signed-off-by: Alin Gabriel Serdean 
> ---
>  ofproto/ofproto-dpif.c | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/ofproto/ofproto-dpif.c b/ofproto/ofproto-dpif.c
> index ad1e8af43..d0d564de1 100644
> --- a/ofproto/ofproto-dpif.c
> +++ b/ofproto/ofproto-dpif.c
> @@ -5813,7 +5813,8 @@ ofproto_unixctl_init(void)
>   NULL);
>  unixctl_command_register("dpif/show-dp-features", "bridge", 1, 1,
>   ofproto_unixctl_dpif_show_dp_features,
> NULL);
> -unixctl_command_register("dpif/dump-flows", "[-m] [--names |
> --no-nmaes] bridge", 1, INT_MAX,
> +unixctl_command_register("dpif/dump-flows",
> + "[-m] [--names | --no-names] bridge", 1,
> INT_MAX,
>   ofproto_unixctl_dpif_dump_flows, NULL);
>  unixctl_command_register("dpif/set-dp-features", "bridge", 1, 3 ,
>   ofproto_unixctl_dpif_set_dp_features, NULL);
> --
> 2.16.1.windows.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [ovs-discuss] ovsdb-server core dump and ovsdb corruption using raft cluster

2018-07-24 Thread Yifeng Sun
Hi Yun and Girish,

I submitted a patch, do you mind testing and reviewing it? Thanks.

[PATCH] dynamic-string: Fix a bug that leads to assertion fail

diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c
index 6f7b610a9908..4564e420544d 100644
--- a/lib/dynamic-string.c
+++ b/lib/dynamic-string.c
@@ -158,7 +158,7 @@ ds_put_format_valist(struct ds *ds, const char *format,
va_list args_)
 if (needed < available) {
 ds->length += needed;
 } else {
-ds_reserve(ds, ds->length + needed);
+ds_reserve(ds, ds->allocated + needed);

 va_copy(args, args_);
 available = ds->allocated - ds->length + 1;


Thanks,
Yifeng Sun

On Wed, Jul 18, 2018 at 10:48 AM, Girish Moodalbail 
wrote:

> Hello all,
>
> We are able to reproduce this issue on OVS 2.9.2 at will. The OVSDB NB
> server or OVSDB SB server dumps core while it is trying to compact the
> database.
>
> You can reproduce the issue by using:
>
> root@u1804-HVM-domU:/var/crash# ovs-appctl -t
> /var/run/openvswitch/ovnsb_db.ctl ovsdb-server/compact OVN_Southbound
>
> 2018-07-18T17:34:29Z|1|unixctl|WARN|error communicating with
> unix:/var/run/openvswitch/ovnsb_db.ctl: End of file
> ovs-appctl: /var/run/openvswitch/ovnsb_db.ctl: transaction error (End of
> file)
> root@u1804-HVM-domU:/var/crash#
> root@u1804-HVM-domU:/var/crash#
> root@u1804-HVM-domU:/var/crash# ERROR: apport (pid 17393) Wed Jul 18
> 10:34:23 2018: called for pid 14683, signal 6, core limit 0, dump mode 1
> ERROR: apport (pid 17393) Wed Jul 18 10:34:23 2018: executable:
> /usr/sbin/ovsdb-server (command line "ovsdb-server -vconsole:off
> -vfile:info --log-file=/var/log/openvswitch/ovsdb-server-sb.log
> --remote=punix:/var/run/openvswitch/ovnsb_db.sock
> --pidfile=/var/run/openvswitch/ovnsb_db.pid --unixctl=ovnsb_db.ctl
> --detach
> --monitor --remote=db:OVN_Southbound,SB_Global,connections
> --private-key=db:OVN_Southbound,SSL,private_key
> --certificate=db:OVN_Southbound,SSL,certificate
> --ca-cert=db:OVN_Southbound,SSL,ca_cert
> --ssl-protocols=db:OVN_Southbound,SSL,ssl_protocols
> --ssl-ciphers=db:OVN_Southbound,SSL,ssl_ciphers
> --remote=ptcp:6642:10.0.7.33 /etc/openvswitch/ovnsb_db.db")
> ERROR: apport (pid 17393) Wed Jul 18 10:34:23 2018: is_closing_session():
> no DBUS_SESSION_BUS_ADDRESS in environment
> ERROR: apport (pid 17393) Wed Jul 18 10:34:29 2018: wrote report
> /var/crash/_usr_sbin_ovsdb-server.0.crash
>
> Looking through the crash we see the following stack:
>
> (gdb) bt
> #0  __GI_raise (sig=sig@entry=6) at ../sysdeps/unix/sysv/linux/raise.c:51
> #1  0x7f7c9a43c801 in __GI_abort () at abort.c:79
> #2  0x7f7c9aaa633c in json_serialize (json=,
> s=) at lib/json.c:1554
> #3  0x7f7c9aaa63ab in json_serialize_object_member (i=,
> s=, node=, node=)
> at lib/json.c:1583
> #4  0x7f7c9aaa62f2 in json_serialize_object (s=0x7ffca2173ea0,
> object=0x5568dc5d5b10) at lib/json.c:1612
> #5  json_serialize (json=, s=0x7ffca2173ea0) at
> lib/json.c:1533
> #6  0x7f7c9aaa863c in json_to_ds (json=json@entry=0x5568dc5d4a20,
> flags=flags@entry=0, ds=ds@entry=0x7ffca2173f30) at lib/json.c:1511
> #7  0x7f7c9ae6750f in ovsdb_log_compose_record
> (json=json@entry=0x5568dc5d4a20,
> magic=0x5568dc5d5a60 "CLUSTER",
> header=header@entry=0x7ffca2173f10, data=data@entry=0x7ffca2173f30) at
> ovsdb/log.c:570
> #8  0x7f7c9ae677ef in ovsdb_log_write (file=0x5568dc5d5a80,
> json=0x5568dc5d4a20) at ovsdb/log.c:618
> #9  0x7f7c9ae6796e in ovsdb_log_write_and_free
> (log=log@entry=0x5568dc5d5a80,
> json=0x5568dc5d4a20) at ovsdb/log.c:651
> #10 0x7f7c9ae6d684 in raft_write_snapshot (raft=raft@entry=
> 0x5568dc1e3720,
> log=0x5568dc5d5a80, new_log_start=new_log_start@entry=539578,
> new_snapshot=new_snapshot@entry=0x7ffca21740e0) at ovsdb/raft.c:3588
> #11 0x7f7c9ae6dbf3 in raft_save_snapshot (raft=raft@entry=
> 0x5568dc1e3720,
> new_start=new_start@entry=539578,
> new_snapshot=new_snapshot@entry=0x7ffca21740e0) at ovsdb/raft.c:3647
> #12 0x7f7c9ae757bd in raft_store_snapshot (raft=0x5568dc1e3720,
> new_snapshot_data=new_snapshot_data@entry=0x5568dc5d49a0)
> at ovsdb/raft.c:3849
> #13 0x7f7c9ae7c7ae in ovsdb_storage_store_snapshot__
> (storage=0x5568dc6b2fb0, schema=0x5568dd66f5a0, data=0x5568dca67880)
> at ovsdb/storage.c:541
> #14 0x7f7c9ae7d1de in ovsdb_storage_store_snapshot
> (storage=0x5568dc6b2fb0, schema=schema@entry=0x5568dd66f5a0,
> data=data@entry=0x5568dca67880) at ovsdb/storage.c:568
> #15 0x7f7c9ae69cab in ovsdb_snapshot (db=0x5568dc6b3020) at
> ovsdb/ovsdb.c:519
> #16 0x5568daec1f82 in main_loop (is_backup=0x7ffca21742be,
> exiting=0x7ffca21742bf, run_process=0x0, remotes=0x7ffca2174310,
> unixctl=0x5568dc71ade0, all_dbs=0x7ffca2174350, jsonrpc=0x5568dc1e36a0,
> config=0x7ffca2174370) at ovsdb/ovsdb-server.c:239
> #17 main (argc=, argv=) at
> ovsdb/ovsdb-server.c:457
>
> Walking through the JSON objects being serialized we see that
> "prev_servers" is 

[ovs-dev] [PATCH] dynamic-string: Fix a bug that leads to assertion fail

2018-07-24 Thread Yifeng Sun
'needed' should be size of needed memory space beyond allocated.

Signed-off-by: Yifeng Sun 
Reported-by: Yun Zhou 
Reported-by: Girish Moodalbail 
---
 lib/dynamic-string.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/dynamic-string.c b/lib/dynamic-string.c
index 6f7b610a9908..4564e420544d 100644
--- a/lib/dynamic-string.c
+++ b/lib/dynamic-string.c
@@ -158,7 +158,7 @@ ds_put_format_valist(struct ds *ds, const char *format, 
va_list args_)
 if (needed < available) {
 ds->length += needed;
 } else {
-ds_reserve(ds, ds->length + needed);
+ds_reserve(ds, ds->allocated + needed);
 
 va_copy(args, args_);
 available = ds->allocated - ds->length + 1;
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/2] ovn: Modify restart_controller in ovn-ctl to use --restart

2018-07-24 Thread Mark Michelson
The --restart flag allows for uninterrupted packet flowage when exiting
ovn-controller. This patch modifies the restart_controller argument to
ovn-ctl to use --restart.

Signed-off-by: Mark Michelson 
---
 ovn/utilities/ovn-ctl | 4 ++--
 utilities/ovs-lib.in  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/ovn/utilities/ovn-ctl b/ovn/utilities/ovn-ctl
index 2fce47714..f76248e84 100755
--- a/ovn/utilities/ovn-ctl
+++ b/ovn/utilities/ovn-ctl
@@ -350,7 +350,7 @@ stop_northd () {
 }
 
 stop_controller () {
-OVS_RUNDIR=${OVN_RUNDIR} stop_daemon ovn-controller
+OVS_RUNDIR=${OVN_RUNDIR} stop_daemon ovn-controller $1
 }
 
 stop_controller_vtep () {
@@ -367,7 +367,7 @@ restart_northd () {
 }
 
 restart_controller () {
-stop_controller
+stop_controller --restart
 start_controller
 }
 
diff --git a/utilities/ovs-lib.in b/utilities/ovs-lib.in
index 92f98ad92..9f62dfd25 100644
--- a/utilities/ovs-lib.in
+++ b/utilities/ovs-lib.in
@@ -258,7 +258,7 @@ stop_daemon () {
 case $action in
 EXIT)
 action "Exiting $1 ($pid)" \
-${bindir}/ovs-appctl -T 1 -t $rundir/$1.$pid.ctl 
exit
+${bindir}/ovs-appctl -T 1 -t $rundir/$1.$pid.ctl 
exit $2
 ;;
 TERM)
 action "Killing $1 ($pid)" kill $pid
-- 
2.14.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/2] ovn: Add '--restart' flag to ovn-controller exit.

2018-07-24 Thread Mark Michelson
When "--restart" is passed to ovn-controller's exit command, then
database entries are not removed for this hypervisor. This means that
* Encaps
* Chassis
* OVS ports
are not removed.

The reasoning is that if the intent is to restart ovn-controller, this
will allow for tunnels to remain up and allow for traffic not to be
interrupted during the restart. When ovn-controller is started again, it
picks back up from where it was.

Signed-off-by: Mark Michelson 
---
 ovn/controller/ovn-controller.c |  92 +++-
 tests/ovn.at| 186 
 2 files changed, 238 insertions(+), 40 deletions(-)

diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-controller.c
index 6ee72a9fa..bd8175af5 100644
--- a/ovn/controller/ovn-controller.c
+++ b/ovn/controller/ovn-controller.c
@@ -541,11 +541,18 @@ ctrl_register_ovs_idl(struct ovsdb_idl *ovs_idl)
 physical_register_ovs_idl(ovs_idl);
 }
 
+struct ovn_controller_exit_args {
+bool *exiting;
+bool *restart;
+};
+
 int
 main(int argc, char *argv[])
 {
 struct unixctl_server *unixctl;
 bool exiting;
+bool restart;
+struct ovn_controller_exit_args exit_args = {, };
 int retval;
 
 ovs_cmdl_proctitle_init(argc, argv);
@@ -560,7 +567,8 @@ main(int argc, char *argv[])
 if (retval) {
 exit(EXIT_FAILURE);
 }
-unixctl_command_register("exit", "", 0, 0, ovn_controller_exit, );
+unixctl_command_register("exit", "", 0, 1, ovn_controller_exit,
+ _args);
 
 /* Initialize group ids for loadbalancing. */
 struct ovn_extend_table group_table;
@@ -631,6 +639,7 @@ main(int argc, char *argv[])
 stopwatch_create(CONTROLLER_LOOP_STOPWATCH_NAME, SW_MS);
 /* Main loop. */
 exiting = false;
+restart = false;
 while (!exiting) {
 /* Check OVN SB database. */
 char *new_ovnsb_remote = get_ovnsb_remote(ovs_idl_loop.idl);
@@ -848,42 +857,45 @@ main(int argc, char *argv[])
 }
 }
 
-/* It's time to exit.  Clean up the databases. */
-bool done = false;
-while (!done) {
-struct ovsdb_idl_txn *ovs_idl_txn = ovsdb_idl_loop_run(_idl_loop);
-struct ovsdb_idl_txn *ovnsb_idl_txn
-= ovsdb_idl_loop_run(_idl_loop);
+/* It's time to exit.  Clean up the databases if we are not restarting */
+if (!restart) {
+bool done = false;
+while (!done) {
+struct ovsdb_idl_txn *ovs_idl_txn
+= ovsdb_idl_loop_run(_idl_loop);
+struct ovsdb_idl_txn *ovnsb_idl_txn
+= ovsdb_idl_loop_run(_idl_loop);
+
+const struct ovsrec_bridge_table *bridge_table
+= ovsrec_bridge_table_get(ovs_idl_loop.idl);
+const struct ovsrec_open_vswitch_table *ovs_table
+= ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
+
+const struct sbrec_port_binding_table *port_binding_table
+= sbrec_port_binding_table_get(ovnsb_idl_loop.idl);
+
+const struct ovsrec_bridge *br_int = get_br_int(ovs_idl_txn,
+bridge_table,
+ovs_table);
+const char *chassis_id = get_chassis_id(ovs_table);
+const struct sbrec_chassis *chassis
+= (chassis_id
+   ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
+   : NULL);
+
+/* Run all of the cleanup functions, even if one of them returns
+ * false. We're done if all of them return true. */
+done = binding_cleanup(ovnsb_idl_txn, port_binding_table, chassis);
+done = chassis_cleanup(ovnsb_idl_txn, chassis) && done;
+done = encaps_cleanup(ovs_idl_txn, br_int) && done;
+if (done) {
+poll_immediate_wake();
+}
 
-const struct ovsrec_bridge_table *bridge_table
-= ovsrec_bridge_table_get(ovs_idl_loop.idl);
-const struct ovsrec_open_vswitch_table *ovs_table
-= ovsrec_open_vswitch_table_get(ovs_idl_loop.idl);
-
-const struct sbrec_port_binding_table *port_binding_table
-= sbrec_port_binding_table_get(ovnsb_idl_loop.idl);
-
-const struct ovsrec_bridge *br_int = get_br_int(ovs_idl_txn,
-bridge_table,
-ovs_table);
-const char *chassis_id = get_chassis_id(ovs_table);
-const struct sbrec_chassis *chassis
-= (chassis_id
-   ? chassis_lookup_by_name(sbrec_chassis_by_name, chassis_id)
-   : NULL);
-
-/* Run all of the cleanup functions, even if one of them returns false.
- * We're done if all of them return true. */
-done = binding_cleanup(ovnsb_idl_txn, port_binding_table, chassis);

[ovs-dev] [PATCH 0/2] Allow for smoother restarting of ovn-controller

2018-07-24 Thread Mark Michelson
When ovn-controller is restarted, the ovn-controller process is stopped
and then started again. When the process is stopped, the process cleans
itself up by removing all traces of itself from the central southbound
database and the OVS database on the hypervisor. The issue with this is
that this removes tunnels, meaning that traffic from other hypervisors
in the cluster is unable to reach the local hypervisor. When restarting,
it would be better to not clean up, thus allowing for uninterrupted
traffic flow.

This patchset allows for ovn-controller to be stopped without cleaning
itself up. This way, during a restart, traffic can still flow freely
while ovn-controller is down.

Mark Michelson (2):
  ovn: Add '--restart' flag to ovn-controller exit.
  ovn: Modify restart_controller in ovn-ctl to use --restart

 ovn/controller/ovn-controller.c |  92 +++-
 ovn/utilities/ovn-ctl   |   4 +-
 tests/ovn.at| 186 
 utilities/ovs-lib.in|   2 +-
 4 files changed, 241 insertions(+), 43 deletions(-)

-- 
2.14.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [ovs-dev, 1 of 4] util: Fix abs_file_name() bugs on Windows.

2018-07-24 Thread 0-day Robot
Bleep bloop.  Greetings Ben Pfaff, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line lacks whitespace around operator
#49 FILE: lib/util.c:1058:
|| (strchr("/\\", fn[0]) && strchr("/\\", fn[1])));

Lines checked: 130, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@bytheb.org

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v5 00/21] Daemon mode for ovn-nbctl

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 03:22:50PM +0200, Jakub Sitnicki wrote:
> On Mon, 23 Jul 2018 16:47:52 -0700
> Ben Pfaff  wrote:
> 
> > On Thu, Jul 19, 2018 at 03:51:05PM +0200, Jakub Sitnicki wrote:
> > > This series extends ovn-nbctl tool with support for the daemon mode, where
> > > ovn-nbctl acts a long-lived process that accepts commands over a UNIX 
> > > socket.
> > > The daemon can be started the same way as any other OVS/OVN server:
> > > 
> > >   ovn-nbctl --detach --pidfile --log-file  
> > 
> > Thanks a lot.  I applied this series to master.
> > 
> > Would you mind sending an additional patch to add an appropriate item to
> > NEWS?
> 
> With pleasure.
>  
> > I'm going to play with some ideas for tests.
> 
> Thanks, I don't have it figured out yet.

I posted some thoughts:
https://patchwork.ozlabs.org/project/openvswitch/list/?series=57386

With this series, the difference between testing with the daemon and
without it is just whether you run
export OVN_NB_DAEMON=$(ovn-nbctl --pidfile --detach)
at the beginning.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 4/4] [RFC] ovn-nbctl: Make daemon mode more transparent.

2018-07-24 Thread Ben Pfaff
This has some flaws in the details; for example, command-line options
parsing is very inflexible.

Signed-off-by: Ben Pfaff 
---
 NEWS  |  6 ++---
 ovn/utilities/ovn-nbctl.8.xml | 61 ---
 ovn/utilities/ovn-nbctl.c | 37 ++
 3 files changed, 91 insertions(+), 13 deletions(-)

diff --git a/NEWS b/NEWS
index 2414b9199ae0..2b4fec48c128 100644
--- a/NEWS
+++ b/NEWS
@@ -31,10 +31,8 @@ Post-v2.9.0
  * ACL match conditions can now match on Port_Groups as well as address
sets that are automatically generated by Port_Groups.  ACLs can be
applied directly to Port_Groups as well.
- * ovn-nbctl can now run as a daemon (long-lived, background process)
-   running commands in response to JSON-RPC requests received over a UNIX
-   socket. Requests to run commands can be sent using ovs-appctl tool, same
-   as for any other OVS/OVN daemon. See ovn-nbctl(8) for details.
+ * ovn-nbctl can now run as a daemon (long-lived, background process).
+   See ovn-nbctl(8) for details.
- DPDK:
  * New 'check-dpdk' Makefile target to run a new system testsuite.
See Testing topic for the details.
diff --git a/ovn/utilities/ovn-nbctl.8.xml b/ovn/utilities/ovn-nbctl.8.xml
index 2cd2fab304cd..3d30d8cd9a33 100644
--- a/ovn/utilities/ovn-nbctl.8.xml
+++ b/ovn/utilities/ovn-nbctl.8.xml
@@ -916,15 +916,62 @@
 Daemon Mode
 
 
-  If ovn-nbctl is invoked with the --detach
-  option (see Daemon Options, below), it runs in the
-  background as a daemon and accepts commands from ovs-appctl
-  (or another JSON-RPC client) indefinitely.  The currently supported
-  commands are described below.
+  When it is invoked in the most ordinary way, ovn-nbctl
+  connects to an OVSDB server that hosts the northbound database, retrieves
+  a partial copy of the database that is complete enough to do its work,
+  sends a transaction request to the server, and receives and processes the
+  server's reply.  In common interactive use, this is fine, but if the
+  database is large, the step in which ovn-nbctl retrieves a
+  partial copy of the database can take a long time, which yields poor
+  performance overall.
 
 
 
+  To improve performance in such a case, ovn-nbctl offers a
+  "daemon mode," in which the user first starts ovn-nbctl
+  running in the background and afterward uses the daemon to execute
+  operations.  Over several ovn-nbctl command invocations,
+  this performs better overall because it retrieves a copy of the database
+  only once at the beginning, not once per program run.
+
+
+
+  Use the --detach option to start an ovn-nbctl
+  daemon.  With this option, ovn-nbctl prints the name of a
+  control socket to stdout.  The client should save this name in
+  environment variable OVN_NB_DAEMON.  Under the Bourne shell
+  this might be done like this:
+
+
+
+  export OVN_NB_DAEMON=$(ovn-nbctl --pidfile --detach)
+
+
+
+  When OVN_NB_DAEMON is set, ovn-nbctl
+  automatically and transparently uses the daemon to execute its commands.
+
+
+
+  When the daemon is no longer needed, kill it and unset the environment
+  variable, e.g.:
+
+
+
+  kill $(cat /var/run/ovn-nbctl.pid)
+  unset OVN_NB_DAEMON
+
+
+
+  Daemon mode is experimental.
+
 
+Daemon Commands
+
+
+  Daemon mode is internally implemented using the same mechanism used by
+  ovs-appctl.  One may also use ovs-appctl
+  directly with the following commands:
 
 
 
@@ -946,10 +993,6 @@
   Causes ovn-nbctl to gracefully terminate.
 
 
-
-  Daemon mode is considered experimental.
-
-
 Options
 
 
diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c
index 3c3e582cb906..8b888294e495 100644
--- a/ovn/utilities/ovn-nbctl.c
+++ b/ovn/utilities/ovn-nbctl.c
@@ -24,6 +24,7 @@
 #include "db-ctl-base.h"
 #include "dirs.h"
 #include "fatal-signal.h"
+#include "jsonrpc.h"
 #include "openvswitch/json.h"
 #include "ovn/lib/acl-log.h"
 #include "ovn/lib/ovn-nb-idl.h"
@@ -123,6 +124,40 @@ main(int argc, char *argv[])
 
 nbctl_cmd_init();
 
+char *socket_name = getenv("OVN_NB_DAEMON");
+if (socket_name && socket_name[0]) {
+struct jsonrpc *client;
+int error = unixctl_client_create(socket_name, );
+if (error) {
+ctl_fatal("%s: could not connect to ovn-nb daemon (%s); "
+  "unset OVN_NB_DAEMON to avoid using daemon",
+  socket_name, ovs_strerror(error));
+}
+
+char *cmd_result;
+char *cmd_error;
+error = unixctl_client_transact(client, "run",
+argc - optind, argv + optind,
+_result, _error);
+if (error) 

[ovs-dev] [PATCH 3/4] unixctl: Make path to unixctl_server socket available to the client.

2018-07-24 Thread Ben Pfaff
Signed-off-by: Ben Pfaff 
---
 lib/unixctl.c | 52 
 lib/unixctl.h |  2 ++
 2 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/lib/unixctl.c b/lib/unixctl.c
index bd9c1caeedef..9b3b0671f33c 100644
--- a/lib/unixctl.c
+++ b/lib/unixctl.c
@@ -56,6 +56,7 @@ struct unixctl_conn {
 struct unixctl_server {
 struct pstream *listener;
 struct ovs_list conns;
+char *path;
 };
 
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 5);
@@ -216,48 +217,44 @@ unixctl_command_reply_error(struct unixctl_conn *conn, 
const char *error)
 int
 unixctl_server_create(const char *path, struct unixctl_server **serverp)
 {
-struct unixctl_server *server;
-struct pstream *listener;
-char *punix_path;
-int error;
-
 *serverp = NULL;
 if (path && !strcmp(path, "none")) {
 return 0;
 }
 
-if (path) {
-char *abs_path;
-abs_path = abs_file_name(ovs_rundir(), path);
-punix_path = xasprintf("punix:%s", abs_path);
-free(abs_path);
-} else {
-#ifndef _WIN32
-punix_path = xasprintf("punix:%s/%s.%ld.ctl", ovs_rundir(),
-   program_name, (long int) getpid());
+#ifdef _WIN32
+enum { WINDOWS = 1 };
 #else
-punix_path = xasprintf("punix:%s/%s.ctl", ovs_rundir(), program_name);
+enum { WINDOWS = 0 };
 #endif
-}
 
-error = pstream_open(punix_path, , 0);
+long int pid = getpid();
+char *abs_path
+= (path ? abs_file_name(ovs_rundir(), path)
+   : WINDOWS ? xasprintf("%s/%s.ctl", ovs_rundir(), program_name)
+   : xasprintf("%s/%s.%ld.ctl", ovs_rundir(), program_name, pid));
+
+struct pstream *listener;
+char *punix_path = xasprintf("punix:%s", abs_path);
+int error = pstream_open(punix_path, , 0);
+free(punix_path);
+
 if (error) {
-ovs_error(error, "could not initialize control socket %s", punix_path);
-goto exit;
+ovs_error(error, "%s: could not initialize control socket", abs_path);
+free(abs_path);
+return error;
 }
 
 unixctl_command_register("list-commands", "", 0, 0, unixctl_list_commands,
  NULL);
 unixctl_command_register("version", "", 0, 0, unixctl_version, NULL);
 
-server = xmalloc(sizeof *server);
+struct unixctl_server *server = xmalloc(sizeof *server);
 server->listener = listener;
+server->path = abs_path;
 ovs_list_init(>conns);
 *serverp = server;
-
-exit:
-free(punix_path);
-return error;
+return 0;
 }
 
 static void
@@ -429,10 +426,17 @@ unixctl_server_destroy(struct unixctl_server *server)
 kill_connection(conn);
 }
 
+free (server->path);
 pstream_close(server->listener);
 free(server);
 }
 }
+
+const char *
+unixctl_server_get_path(const struct unixctl_server *server)
+{
+return server ? server->path : NULL;
+}
 
 /* On POSIX based systems, connects to a unixctl server socket.  'path' should
  * be the name of a unixctl server socket.  If it does not start with '/', it
diff --git a/lib/unixctl.h b/lib/unixctl.h
index ce43893c6a7d..4562dbc49113 100644
--- a/lib/unixctl.h
+++ b/lib/unixctl.h
@@ -28,6 +28,8 @@ void unixctl_server_run(struct unixctl_server *);
 void unixctl_server_wait(struct unixctl_server *);
 void unixctl_server_destroy(struct unixctl_server *);
 
+const char *unixctl_server_get_path(const struct unixctl_server *);
+
 /* Client for Unix domain socket control connection. */
 struct jsonrpc;
 int unixctl_client_create(const char *path, struct jsonrpc **client);
-- 
2.16.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/4] unixctl: Use absolute paths on Windows too.

2018-07-24 Thread Ben Pfaff
When this case was adapted for Windows, asb_file_name() simply didn't work
at all there.  Now, it should work OK, and it seems like the right thing
to do, and it makes the code more straightforward too.

CC: Alin Gabriel Serdean 
Signed-off-by: Ben Pfaff 
---
 lib/unixctl.c | 13 ++---
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/lib/unixctl.c b/lib/unixctl.c
index df9c2bdc3e07..bd9c1caeedef 100644
--- a/lib/unixctl.c
+++ b/lib/unixctl.c
@@ -228,11 +228,7 @@ unixctl_server_create(const char *path, struct 
unixctl_server **serverp)
 
 if (path) {
 char *abs_path;
-#ifndef _WIN32
 abs_path = abs_file_name(ovs_rundir(), path);
-#else
-abs_path = xstrdup(path);
-#endif
 punix_path = xasprintf("punix:%s", abs_path);
 free(abs_path);
 } else {
@@ -451,16 +447,11 @@ unixctl_server_destroy(struct unixctl_server *server)
 int
 unixctl_client_create(const char *path, struct jsonrpc **client)
 {
-char *abs_path, *unix_path;
 struct stream *stream;
 int error;
 
-#ifdef _WIN32
-abs_path = xstrdup(path);
-#else
-abs_path = abs_file_name(ovs_rundir(), path);
-#endif
-unix_path = xasprintf("unix:%s", abs_path);
+char *abs_path = abs_file_name(ovs_rundir(), path);
+char *unix_path = xasprintf("unix:%s", abs_path);
 
 *client = NULL;
 
-- 
2.16.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/4] util: Fix abs_file_name() bugs on Windows.

2018-07-24 Thread Ben Pfaff
abs_file_name() believed that a file name that begins with / or contains :
is absolute and that any other file name is relative.  On Windows, this is
wrong in at least the following ways:

   * / and \ are interchangeable on Windows.

   * A name that begins with \\ or // is also absolute.

   * A name that begins with X: but not X:\ is not absolute.

   * A name with : in some position other than the second position is
 not absolute (although it might not be valid either?).

Furthermore, Windows has more than one current working directory (one per
volume letter), so trying to make a file name absolute by just prefixing
the current working directory for the current volume results in silliness.

This patch attempts to fix the problem.

Found by inspection.

CC: Alin Gabriel Serdean 
Signed-off-by: Ben Pfaff 
---
 lib/util.c | 58 +++---
 lib/util.h |  1 +
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/lib/util.c b/lib/util.c
index 7152b55392be..d3c62988bf83 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -1049,37 +1049,57 @@ base_name(const char *file_name)
 }
 #endif /* _WIN32 */
 
-/* If 'file_name' starts with '/', returns a copy of 'file_name'.  Otherwise,
+bool
+is_file_name_absolute(const char *fn)
+{
+#ifdef _WIN32
+/* An absolute path begins with X:\ or \\. */
+return ((fn[0] && fn[1] == ':' && strchr("/\\", fn[2]))
+|| (strchr("/\\", fn[0]) && strchr("/\\", fn[1])));
+#else
+/* An absolute path begins with /. */
+return fn[0] == '/';
+#endif
+}
+
+/* If 'file_name' is absolute, returns a copy of 'file_name'.  Otherwise,
  * returns an absolute path to 'file_name' considering it relative to 'dir',
  * which itself must be absolute.  'dir' may be null or the empty string, in
  * which case the current working directory is used.
  *
- * Additionally on Windows, if 'file_name' has a ':', returns a copy of
- * 'file_name'
- *
  * Returns a null pointer if 'dir' is null and getcwd() fails. */
 char *
 abs_file_name(const char *dir, const char *file_name)
 {
-if (file_name[0] == '/') {
+/* If it's already absolute, return a copy. */
+if (is_file_name_absolute(file_name)) {
 return xstrdup(file_name);
-#ifdef _WIN32
-} else if (strchr(file_name, ':')) {
-return xstrdup(file_name);
-#endif
-} else if (dir && dir[0]) {
+}
+
+/* If a base dir was supplied, use it.  We assume, without checking, that
+ * the base dir is absolute.*/
+if (dir && dir[0]) {
 char *separator = dir[strlen(dir) - 1] == '/' ? "" : "/";
 return xasprintf("%s%s%s", dir, separator, file_name);
-} else {
-char *cwd = get_cwd();
-if (cwd) {
-char *abs_name = xasprintf("%s/%s", cwd, file_name);
-free(cwd);
-return abs_name;
-} else {
-return NULL;
-}
 }
+
+#if _WIN32
+/* It's a little complicated to make an absolute path on Windows because a
+ * relative path might still specify a drive letter.  The OS has a function
+ * to do the job for us, so use it. */
+char abs_path[MAX_PATH];
+DWORD n = GetFullPathName(file_name, sizeof abs_path, abs_path, NULL);
+return n > 0 && n <= sizeof abs_path ? xmemdup0(abs_path, n) : NULL;
+#else
+/* Outside Windows, do the job ourselves. */
+char *cwd = get_cwd();
+if (!cwd) {
+return NULL;
+}
+char *abs_name = xasprintf("%s/%s", cwd, file_name);
+free(cwd);
+return abs_name;
+#endif
 }
 
 /* Like readlink(), but returns the link name as a null-terminated string in
diff --git a/lib/util.h b/lib/util.h
index 0061bb986520..fd414489c7fe 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -231,6 +231,7 @@ char *dir_name(const char *file_name);
 char *base_name(const char *file_name);
 #endif
 char *abs_file_name(const char *dir, const char *file_name);
+bool is_file_name_absolute(const char *);
 
 char *follow_symlinks(const char *filename);
 
-- 
2.16.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v6 00/13] Support multi-segment mbufs

2018-07-24 Thread Stokes, Ian
> On Tue, Jul 24, 2018 at 07:55:39PM +0300, Ilya Maximets wrote:
> > Hi.
> > Just wanted to add some comments for the use-cases and testing
> methodology.
> > See inline.
> >
> > And I'm actually not sure if there any profit from this patch set?
> > It looks like an internal mbuf handling rework that only degrades the
> > performance and complicates the code.
> >
> > Please, don't consider me as merge blocker. I just want to understand
> > why you think we need this 1200 LOCs?
> >
> > ---
> > About 'resize()' related discussion:
> > Maybe it's worth to allow dp_packet APIs to return different dp_packet.
> > In this case we'll be able to just clone the packet to malloced memory
> > and resize in cases of not enough headroom available.
> > Like:
> > packet = eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci); or
> > eth_push_vlan(, vlan->vlan_tpid, vlan->vlan_tci);
> >
> > This will have a little performance penalty in compare with data
> > shifting inside the mbuf, but will be much more elegant, and will
> > allow to eliminate all the OVS_NOT_REACHED cases.
> >
> >
> > Best regards, Ilya Maximets.
> 
> Hmm, this is the second person from whom I've heard serious misgivings
> about this patch series.  Tiago, Ian, would you like to respond?  I'm a
> little nervous about merging this patch series, especially relatively late
> before branching, given that some people have technical objections to it.
> 

I think Tiago is currently responding to the queries Ilya has raised but I'd 
like to respond to this being included in the 2.10 release specifically.

The feature has been in development for a while and up until Tiago had taken 
over the work 2 months had received little feedback.

That being said we have had a number of people testing the feature over the 
last month+ who are familiar with OVS DPDK and have since signed off on it as 
it is enabling a common customer requirement for TSO/GRO offloads.

This is just a stepping stone to TSO and GRO enablement for OVS DPDK which is 
called out as a feature gap between kernel Ovs and userspace OVS, the work will 
be continued from our side with the aim of for OVS 2.11 so any changes required 
can be refined.

>From a validation POV it's not breaking anything existing for OVS DPDK that 
>I've have come across so the risk on that side is low.

Would it be preferred to hold off merging this until the TSO/GRO aspects have 
been implemented also? I guess my view here was that I would like to enable 
users to begin using the multiseg feature with the 2.10 release and we can 
refine our approach if needed based on the community feedback.

There are some interesting discussions such as changes to the dp_packet API 
above, but it's a pity it comes at a critical time window. I wonder are the API 
changes separate to this work? Impact would be felt outside of the multiseg 
also I would think as they are APIs. If there are changes required to the APIs 
I would envision we would update the mutliseg feature to work with these 
changes as they are introduced in the future.

If you feel this is too much of a risk we can remove it from 2.10. I can respin 
a new pull request with the remaining patches.

Thanks
Ian

> Thanks,
> 
> Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v6 00/13] Support multi-segment mbufs

2018-07-24 Thread Lam, Tiago
Hi Ilya,

Given the timeline, I'm replying in-line to the critical parts of the
email (the ones that question the feature itself). I'll come back and
reply to the rest later.

On 24/07/2018 17:55, Ilya Maximets wrote:
> Hi.
> Just wanted to add some comments for the use-cases and testing methodology.
> See inline.
> 
> And I'm actually not sure if there any profit from this patch set?
> It looks like an internal mbuf handling rework that only degrades
> the performance and complicates the code.
> 
> Please, don't consider me as merge blocker. I just want to understand
> why you think we need this 1200 LOCs?
> 

Of which 1/2 are related to tests. Granted it is still LOC, but don't
directly impact the dp-packet/netdev-dpdk modules. I believe that
distinction is important.

> ---
> About 'resize()' related discussion:
> Maybe it's worth to allow dp_packet APIs to return different dp_packet.
> In this case we'll be able to just clone the packet to malloced memory
> and resize in cases of not enough headroom available.
> Like:
>   packet = eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> or
>   eth_push_vlan(, vlan->vlan_tpid, vlan->vlan_tci);
> 
> This will have a little performance penalty in compare with data shifting
> inside the mbuf, but will be much more elegant, and will allow to eliminate
> all the OVS_NOT_REACHED cases.
> 
> 
> Best regards, Ilya Maximets.
> 
> On 24.07.2018 17:25, Tiago Lam wrote:
>> Overview
>> 
>> This patchset introduces support for multi-segment mbufs to OvS-DPDK.
>> Multi-segment mbufs are typically used when the size of an mbuf is
>> insufficient to contain the entirety of a packet's data. Instead, the
>> data is split across numerous mbufs, each carrying a portion, or
>> 'segment', of the packet data. mbufs are chained via their 'next'
>> attribute (an mbuf pointer).
>>
>> Use Cases
>> =
>> i.  Handling oversized (guest-originated) frames, which are marked
>> for hardware accelration/offload (TSO, for example).
> 
> This is not a real use case as vhost doesn't support TSO and other
> offloading that requires handling segmented mbufs, so, guests are
> not allowed to send packets larger than MTU. netdev-linux interfaces
> also not allowed to receive packets larger than MTU.
> 
> TSO and other offloading support will require much more work, and,
> in fact, the profit from it is controversial.
> 

There might be some context missing here. As you mentioned, TSO is not
possible at the moment with OvS-DPDK. However, we are working on
bringing TSO / GRO to OvS-DPDK and this serves as preparatory work to
enable those features. These are features some customers are requesting
for, and closes a feature gap between OvS and OvS-DPDK.

>>
>> Packets which originate from a non-DPDK source may be marked for
>> offload; as such, they may be larger than the permitted ingress
>> interface's MTU, and may be stored in an oversized dp-packet. In
>> order to transmit such packets over a DPDK port, their contents
>> must be copied to a DPDK mbuf (via dpdk_do_tx_copy). However, in
>> its current implementation, that function only copies data into
>> a single mbuf; if the space available in the mbuf is exhausted,
>> but not all packet data has been copied, then it is lost.
>> Similarly, when cloning a DPDK mbuf, it must be considered
>> whether that mbuf contains multiple segments. Both issues are
>> resolved within this patchset.> 
>> ii. Handling jumbo frames.
> 
> Different internal representation of big packets. Why we need this?
> 

I believe the above answer should cover this as well.

>>
>> While OvS already supports jumbo frames, it does so by increasing
>> mbuf size, such that the entirety of a jumbo frame may be handled
>> in a single mbuf. This is certainly the preferred, and most
>> performant approach (and remains the default).
>>
>> Enabling multi-segment mbufs
>> 
>> Multi-segment and single-segment mbufs are mutually exclusive, and the
>> user must decide on which approach to adopt on init. The introduction
>> of a new OVSDB field, 'dpdk-multi-seg-mbufs', facilitates this.
>>
>> This is a global boolean value, which determines how jumbo frames are
>> represented across all DPDK ports. In the absence of a user-supplied
>> value, 'dpdk-multi-seg-mbufs' defaults to false, i.e. multi-segment
>> mbufs must be explicitly enabled / single-segment mbufs remain the
>> default.
>>
>> Setting the field is identical to setting existing DPDK-specific OVSDB
>> fields:
>>
>> ovs-vsctl set Open_vSwitch . other_config:dpdk-init=true
>> ovs-vsctl set Open_vSwitch . other_config:dpdk-lcore-mask=0x10
>> ovs-vsctl set Open_vSwitch . other_config:dpdk-socket-mem=4096,0
>> ==> ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true
>>
>> Performance notes (based on v8)
>> =
>> In order to test for regressions in performance, tests were run on top

Re: [ovs-dev] [ovs-dev, 1 of 2] util: Fix abs_file_name() bugs on Windows.

2018-07-24 Thread 0-day Robot
Bleep bloop.  Greetings Ben Pfaff, I am a robot and I have tried out your patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line lacks whitespace around operator
#49 FILE: lib/util.c:1058:
|| (strchr("/\\", fn[0]) && strchr("/\\", fn[1])));

Lines checked: 130, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@bytheb.org

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v6 00/13] Support multi-segment mbufs

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 07:55:39PM +0300, Ilya Maximets wrote:
> Hi.
> Just wanted to add some comments for the use-cases and testing methodology.
> See inline.
> 
> And I'm actually not sure if there any profit from this patch set?
> It looks like an internal mbuf handling rework that only degrades
> the performance and complicates the code.
> 
> Please, don't consider me as merge blocker. I just want to understand
> why you think we need this 1200 LOCs?
> 
> ---
> About 'resize()' related discussion:
> Maybe it's worth to allow dp_packet APIs to return different dp_packet.
> In this case we'll be able to just clone the packet to malloced memory
> and resize in cases of not enough headroom available.
> Like:
>   packet = eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
> or
>   eth_push_vlan(, vlan->vlan_tpid, vlan->vlan_tci);
> 
> This will have a little performance penalty in compare with data shifting
> inside the mbuf, but will be much more elegant, and will allow to eliminate
> all the OVS_NOT_REACHED cases.
> 
> 
> Best regards, Ilya Maximets.

Hmm, this is the second person from whom I've heard serious misgivings
about this patch series.  Tiago, Ian, would you like to respond?  I'm a
little nervous about merging this patch series, especially relatively
late before branching, given that some people have technical objections
to it.

Thanks,

Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/2] unixctl: Use absolute paths on Windows too.

2018-07-24 Thread Ben Pfaff
When this case was adapted for Windows, asb_file_name() simply didn't work
at all there.  Now, it should work OK, and it seems like the right thing
to do, and it makes the code more straightforward too.

CC: Alin Gabriel Serdean 
Signed-off-by: Ben Pfaff 
---
 lib/unixctl.c | 13 ++---
 1 file changed, 2 insertions(+), 11 deletions(-)

diff --git a/lib/unixctl.c b/lib/unixctl.c
index df9c2bdc3e07..bd9c1caeedef 100644
--- a/lib/unixctl.c
+++ b/lib/unixctl.c
@@ -228,11 +228,7 @@ unixctl_server_create(const char *path, struct 
unixctl_server **serverp)
 
 if (path) {
 char *abs_path;
-#ifndef _WIN32
 abs_path = abs_file_name(ovs_rundir(), path);
-#else
-abs_path = xstrdup(path);
-#endif
 punix_path = xasprintf("punix:%s", abs_path);
 free(abs_path);
 } else {
@@ -451,16 +447,11 @@ unixctl_server_destroy(struct unixctl_server *server)
 int
 unixctl_client_create(const char *path, struct jsonrpc **client)
 {
-char *abs_path, *unix_path;
 struct stream *stream;
 int error;
 
-#ifdef _WIN32
-abs_path = xstrdup(path);
-#else
-abs_path = abs_file_name(ovs_rundir(), path);
-#endif
-unix_path = xasprintf("unix:%s", abs_path);
+char *abs_path = abs_file_name(ovs_rundir(), path);
+char *unix_path = xasprintf("unix:%s", abs_path);
 
 *client = NULL;
 
-- 
2.16.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/2] util: Fix abs_file_name() bugs on Windows.

2018-07-24 Thread Ben Pfaff
abs_file_name() believed that a file name that begins with / or contains :
is absolute and that any other file name is relative.  On Windows, this is
wrong in at least the following ways:

   * / and \ are interchangeable on Windows.

   * A name that begins with \\ or // is also absolute.

   * A name that begins with X: but not X:\ is not absolute.

   * A name with : in some position other than the second position is
 not absolute (although it might not be valid either?).

Furthermore, Windows has more than one current working directory (one per
volume letter), so trying to make a file name absolute by just prefixing
the current working directory for the current volume results in silliness.

This patch attempts to fix the problem.

Found by inspection.

CC: Alin Gabriel Serdean 
Signed-off-by: Ben Pfaff 
---
 lib/util.c | 58 +++---
 lib/util.h |  1 +
 2 files changed, 40 insertions(+), 19 deletions(-)

diff --git a/lib/util.c b/lib/util.c
index 7152b55392be..d3c62988bf83 100644
--- a/lib/util.c
+++ b/lib/util.c
@@ -1049,37 +1049,57 @@ base_name(const char *file_name)
 }
 #endif /* _WIN32 */
 
-/* If 'file_name' starts with '/', returns a copy of 'file_name'.  Otherwise,
+bool
+is_file_name_absolute(const char *fn)
+{
+#ifdef _WIN32
+/* An absolute path begins with X:\ or \\. */
+return ((fn[0] && fn[1] == ':' && strchr("/\\", fn[2]))
+|| (strchr("/\\", fn[0]) && strchr("/\\", fn[1])));
+#else
+/* An absolute path begins with /. */
+return fn[0] == '/';
+#endif
+}
+
+/* If 'file_name' is absolute, returns a copy of 'file_name'.  Otherwise,
  * returns an absolute path to 'file_name' considering it relative to 'dir',
  * which itself must be absolute.  'dir' may be null or the empty string, in
  * which case the current working directory is used.
  *
- * Additionally on Windows, if 'file_name' has a ':', returns a copy of
- * 'file_name'
- *
  * Returns a null pointer if 'dir' is null and getcwd() fails. */
 char *
 abs_file_name(const char *dir, const char *file_name)
 {
-if (file_name[0] == '/') {
+/* If it's already absolute, return a copy. */
+if (is_file_name_absolute(file_name)) {
 return xstrdup(file_name);
-#ifdef _WIN32
-} else if (strchr(file_name, ':')) {
-return xstrdup(file_name);
-#endif
-} else if (dir && dir[0]) {
+}
+
+/* If a base dir was supplied, use it.  We assume, without checking, that
+ * the base dir is absolute.*/
+if (dir && dir[0]) {
 char *separator = dir[strlen(dir) - 1] == '/' ? "" : "/";
 return xasprintf("%s%s%s", dir, separator, file_name);
-} else {
-char *cwd = get_cwd();
-if (cwd) {
-char *abs_name = xasprintf("%s/%s", cwd, file_name);
-free(cwd);
-return abs_name;
-} else {
-return NULL;
-}
 }
+
+#if _WIN32
+/* It's a little complicated to make an absolute path on Windows because a
+ * relative path might still specify a drive letter.  The OS has a function
+ * to do the job for us, so use it. */
+char abs_path[MAX_PATH];
+DWORD n = GetFullPathName(file_name, sizeof abs_path, abs_path, NULL);
+return n > 0 && n <= sizeof abs_path ? xmemdup0(abs_path, n) : NULL;
+#else
+/* Outside Windows, do the job ourselves. */
+char *cwd = get_cwd();
+if (!cwd) {
+return NULL;
+}
+char *abs_name = xasprintf("%s/%s", cwd, file_name);
+free(cwd);
+return abs_name;
+#endif
 }
 
 /* Like readlink(), but returns the link name as a null-terminated string in
diff --git a/lib/util.h b/lib/util.h
index 0061bb986520..fd414489c7fe 100644
--- a/lib/util.h
+++ b/lib/util.h
@@ -231,6 +231,7 @@ char *dir_name(const char *file_name);
 char *base_name(const char *file_name);
 #endif
 char *abs_file_name(const char *dir, const char *file_name);
+bool is_file_name_absolute(const char *);
 
 char *follow_symlinks(const char *filename);
 
-- 
2.16.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 0/2] abs_file_name() fixes for Windows.

2018-07-24 Thread Ben Pfaff
I found these nits while looking at the unixctl code for other reasons.  I
have not tested them on Windows.

Ben Pfaff (2):
  util: Fix abs_file_name() bugs on Windows.
  unixctl: Use absolute paths on Windows too.

 lib/unixctl.c | 13 ++---
 lib/util.c| 58 +++---
 lib/util.h|  1 +
 3 files changed, 42 insertions(+), 30 deletions(-)

-- 
2.16.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Stokes, Ian
> On Tue, Jul 24, 2018 at 04:30:08PM +, Stokes, Ian wrote:
> > > On Tue, Jul 24, 2018 at 11:25:26AM +, Stokes, Ian wrote:
> > > > > On 20/07/2018 16:56, Ian Stokes wrote:
> > > > > > Hi Ben,
> > > > > >
> > > > > > The following changes since commit
> > > > > > 3c921cc2b6b760bd0db73fd629ee9614edc8914c:
> > > > > >
> > > > > >build: Add gitattribute file to build-aux (2018-07-19
> > > > > > 21:02:33
> > > > > > +0300)
> > > > > >
> > > > > > are available in the git repository at:
> > > > > >
> > > > > >https://github.com/istokes/ovs dpdk_merge
> > > > > >
> > > > > > for you to fetch changes up to
> > > 0e0e9e213f13be508d282ffefd7bbe8c680e4fc8:
> > > > > >
> > > > > >sparse: Add support for DPDK. (2018-07-20 15:44:45 +0100)
> > > > > >
> > > > >
> > > > > Hi Ian,
> > > > >
> > > > > After some discussions with Darrell around patch 8/14 of the
> > > > > "Support multi-segment mbufs​" patchset, I'll be sending a v6 of
> > > > > that series with that patch removed.
> > > > >
> > > > > Could that be included in this PR (meaning the previously
> > > > > included
> > > > > v5 of the series would be dropped entirely), since it still
> > > > > hasn't been merged to master?
> > > > >
> > > > > Thanks, and sorry for the trouble here.
> > > >
> > > > No problem, it makes more sense for a new pull request, otherwise
> > > > there
> > > would be a revert for patch 8. If you send out a v6 with the
> > > required patch removed and any other changes (which are small I
> > > would think) I can review and apply to a new pull request.
> > >
> > > Ian, would you mind just appending the changes to your current PR so
> > > that I can pull the whole thing in one go?
> >
> > Sure, I've sent a new pull request that have all the changes ready and
> rebased to head of master just now.
> >
> > https://mail.openvswitch.org/pipermail/ovs-dev/2018-July/350005.html
> 
> Oh, and can you confirm that you'd like this to be merged into master
> before I branch for 2.10 (which will probably happen today)?
> 

Yes, this is targeted for 2.10 as well, if you can merge to master before 
branch that would be great.

Ian
> Thanks,
> 
> Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 04:30:08PM +, Stokes, Ian wrote:
> > On Tue, Jul 24, 2018 at 11:25:26AM +, Stokes, Ian wrote:
> > > > On 20/07/2018 16:56, Ian Stokes wrote:
> > > > > Hi Ben,
> > > > >
> > > > > The following changes since commit
> > > > > 3c921cc2b6b760bd0db73fd629ee9614edc8914c:
> > > > >
> > > > >build: Add gitattribute file to build-aux (2018-07-19 21:02:33
> > > > > +0300)
> > > > >
> > > > > are available in the git repository at:
> > > > >
> > > > >https://github.com/istokes/ovs dpdk_merge
> > > > >
> > > > > for you to fetch changes up to
> > 0e0e9e213f13be508d282ffefd7bbe8c680e4fc8:
> > > > >
> > > > >sparse: Add support for DPDK. (2018-07-20 15:44:45 +0100)
> > > > >
> > > >
> > > > Hi Ian,
> > > >
> > > > After some discussions with Darrell around patch 8/14 of the
> > > > "Support multi-segment mbufs​" patchset, I'll be sending a v6 of
> > > > that series with that patch removed.
> > > >
> > > > Could that be included in this PR (meaning the previously included
> > > > v5 of the series would be dropped entirely), since it still hasn't
> > > > been merged to master?
> > > >
> > > > Thanks, and sorry for the trouble here.
> > >
> > > No problem, it makes more sense for a new pull request, otherwise there
> > would be a revert for patch 8. If you send out a v6 with the required
> > patch removed and any other changes (which are small I would think) I can
> > review and apply to a new pull request.
> > 
> > Ian, would you mind just appending the changes to your current PR so that
> > I can pull the whole thing in one go?
> 
> Sure, I've sent a new pull request that have all the changes ready and 
> rebased to head of master just now.
> 
> https://mail.openvswitch.org/pipermail/ovs-dev/2018-July/350005.html

Oh, and can you confirm that you'd like this to be merged into master
before I branch for 2.10 (which will probably happen today)?

Thanks,

Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v6 00/13] Support multi-segment mbufs

2018-07-24 Thread Ilya Maximets
Hi.
Just wanted to add some comments for the use-cases and testing methodology.
See inline.

And I'm actually not sure if there any profit from this patch set?
It looks like an internal mbuf handling rework that only degrades
the performance and complicates the code.

Please, don't consider me as merge blocker. I just want to understand
why you think we need this 1200 LOCs?

---
About 'resize()' related discussion:
Maybe it's worth to allow dp_packet APIs to return different dp_packet.
In this case we'll be able to just clone the packet to malloced memory
and resize in cases of not enough headroom available.
Like:
packet = eth_push_vlan(packet, vlan->vlan_tpid, vlan->vlan_tci);
or
eth_push_vlan(, vlan->vlan_tpid, vlan->vlan_tci);

This will have a little performance penalty in compare with data shifting
inside the mbuf, but will be much more elegant, and will allow to eliminate
all the OVS_NOT_REACHED cases.


Best regards, Ilya Maximets.

On 24.07.2018 17:25, Tiago Lam wrote:
> Overview
> 
> This patchset introduces support for multi-segment mbufs to OvS-DPDK.
> Multi-segment mbufs are typically used when the size of an mbuf is
> insufficient to contain the entirety of a packet's data. Instead, the
> data is split across numerous mbufs, each carrying a portion, or
> 'segment', of the packet data. mbufs are chained via their 'next'
> attribute (an mbuf pointer).
> 
> Use Cases
> =
> i.  Handling oversized (guest-originated) frames, which are marked
> for hardware accelration/offload (TSO, for example).

This is not a real use case as vhost doesn't support TSO and other
offloading that requires handling segmented mbufs, so, guests are
not allowed to send packets larger than MTU. netdev-linux interfaces
also not allowed to receive packets larger than MTU.

TSO and other offloading support will require much more work, and,
in fact, the profit from it is controversial.

> 
> Packets which originate from a non-DPDK source may be marked for
> offload; as such, they may be larger than the permitted ingress
> interface's MTU, and may be stored in an oversized dp-packet. In
> order to transmit such packets over a DPDK port, their contents
> must be copied to a DPDK mbuf (via dpdk_do_tx_copy). However, in
> its current implementation, that function only copies data into
> a single mbuf; if the space available in the mbuf is exhausted,
> but not all packet data has been copied, then it is lost.
> Similarly, when cloning a DPDK mbuf, it must be considered
> whether that mbuf contains multiple segments. Both issues are
> resolved within this patchset.> 
> ii. Handling jumbo frames.

Different internal representation of big packets. Why we need this?

> 
> While OvS already supports jumbo frames, it does so by increasing
> mbuf size, such that the entirety of a jumbo frame may be handled
> in a single mbuf. This is certainly the preferred, and most
> performant approach (and remains the default).
> 
> Enabling multi-segment mbufs
> 
> Multi-segment and single-segment mbufs are mutually exclusive, and the
> user must decide on which approach to adopt on init. The introduction
> of a new OVSDB field, 'dpdk-multi-seg-mbufs', facilitates this.
> 
> This is a global boolean value, which determines how jumbo frames are
> represented across all DPDK ports. In the absence of a user-supplied
> value, 'dpdk-multi-seg-mbufs' defaults to false, i.e. multi-segment
> mbufs must be explicitly enabled / single-segment mbufs remain the
> default.
> 
> Setting the field is identical to setting existing DPDK-specific OVSDB
> fields:
> 
> ovs-vsctl set Open_vSwitch . other_config:dpdk-init=true
> ovs-vsctl set Open_vSwitch . other_config:dpdk-lcore-mask=0x10
> ovs-vsctl set Open_vSwitch . other_config:dpdk-socket-mem=4096,0
> ==> ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true
> 
> Performance notes (based on v8)
> =
> In order to test for regressions in performance, tests were run on top
> of master 88125d6 and v8 of this patchset, both with the multi-segment
> mbufs option enabled and disabled.
> 
> VSperf was used to run the phy2phy_cont and pvp_cont tests with varying
> packet sizes of 64B, 1500B and 7000B, on a 10Gbps interface.
> 

I'm sorry, but these performance tests and below analysis are mostly useless.
It's makes no sense to test 1.5K or 7K bytes packets with 10G interface.
OVS is capable to handle this traffic without any drops in almost any
configuration. My PVP test with bonded phy easily takes almost 2 times
higher traffic rates using 2x10G interfaces in balanced bonding with disabled
caches. Below table just reports that you're reaching the limit of the
10G physical NICs.

Just wanted to point on that. Next time, please, use better test scenarios.

I expect that enabling multi-segment mbufs will reduce performance in all
scenarios for a few percents.

> 

[ovs-dev] [PATCH] ovs-tcpundump: Fix incompatibilities with python3

2018-07-24 Thread Timothy Redaelli
Added parenthesis after print and use "as" instead of "," in except.

This commit fixes also a couple of flake8 warnings:

utilities/ovs-tcpundump:23:1: E302 expected 2 blank lines, found 1
utilities/ovs-tcpundump:35:1: E305 expected 2 blank lines after class or
function definition, found 1

Tested on Python 2.7.15 and Python 3.6.5

Signed-off-by: Timothy Redaelli 
---
 utilities/ovs-tcpundump.in | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/utilities/ovs-tcpundump.in b/utilities/ovs-tcpundump.in
index c99015b5b..c29870062 100755
--- a/utilities/ovs-tcpundump.in
+++ b/utilities/ovs-tcpundump.in
@@ -20,8 +20,9 @@ import sys
 
 argv0 = sys.argv[0]
 
+
 def usage():
-print """\
+print("""\
 %(argv0)s: print "tcpdump -xx" output as hex
 usage: %(argv0)s < FILE
 where FILE is output from "tcpdump -xx".
@@ -29,14 +30,15 @@ where FILE is output from "tcpdump -xx".
 The following options are also available:
   -h, --help  display this help message
   -V, --version   display version information\
-""" % {'argv0': argv0}
+""" % {'argv0': argv0})
 sys.exit(0)
 
+
 if __name__ == "__main__":
 try:
 options, args = getopt.gnu_getopt(sys.argv[1:], 'hV',
   ['help', 'version'])
-except getopt.GetoptError, geo:
+except getopt.GetoptError as geo:
 sys.stderr.write("%s: %s\n" % (argv0, geo.msg))
 sys.exit(1)
 
@@ -44,7 +46,7 @@ if __name__ == "__main__":
 if key in ['-h', '--help']:
 usage()
 elif key in ['-V', '--version']:
-print "ovs-tcpundump (Open vSwitch) @VERSION@"
+print("ovs-tcpundump (Open vSwitch) @VERSION@")
 else:
 sys.exit(0)
 
@@ -63,12 +65,12 @@ if __name__ == "__main__":
 m = regex.match(line)
 if m is None or int(m.group(1), 16) == 0:
 if packet != '':
-print packet
+print(packet)
 packet = ''
 if m:
 packet += re.sub(r'\s', '', m.group(2), 0)
 if packet != '':
-print packet
+print(packet)
 
 # Local variables:
 # mode: python
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] ovs-tcpdump: Fix incompatibilities with python3

2018-07-24 Thread Timothy Redaelli
Opening a file with 'rw' in Python3 returns an error, moreover using 'rw' in
Python2 is wrong too since it opens the file using O_RDONLY and not by using
O_RDWR.

This commit fixes it by using the low-level os.open function with O_RDWR
as suggested by the Linux kernel (tuntap.txt) documentation.

This commit fixes also some usual bytes vs string incompatibilities.

Tested on Python 2.7.15 and Python 3.6.5

Signed-off-by: Timothy Redaelli 
---
 utilities/ovs-tcpdump.in | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/utilities/ovs-tcpdump.in b/utilities/ovs-tcpdump.in
index 91fa14e5a..17b5d48f1 100755
--- a/utilities/ovs-tcpdump.in
+++ b/utilities/ovs-tcpdump.in
@@ -62,8 +62,8 @@ def _install_tap_linux(tap_name, mtu_value=None):
 TUNSETIFF = 0x400454CA  # This is derived by printf() of TUNSETIFF
 TUNSETOWNER = TUNSETIFF + 2
 
-tapdev_fd = open('/dev/net/tun', 'rw')
-ifr = struct.pack('16sH', tap_name, IFF_TAP | IFF_NO_PI)
+tapdev_fd = os.open('/dev/net/tun', os.O_RDWR)
+ifr = struct.pack('16sH', tap_name.encode('utf8'), IFF_TAP | IFF_NO_PI)
 fcntl.ioctl(tapdev_fd, TUNSETIFF, ifr)
 fcntl.ioctl(tapdev_fd, TUNSETOWNER, os.getegid())
 
@@ -457,10 +457,10 @@ def main():
 pipes = _doexec(*([dump_cmd, '-i', mirror_interface] + tcpdargs))
 try:
 while pipes.poll() is None:
-data = pipes.stdout.readline().strip('\n')
+data = pipes.stdout.readline().strip(b'\n')
 if len(data) == 0:
 raise KeyboardInterrupt
-print(data)
+print(data.decode('utf-8'))
 raise KeyboardInterrupt
 except KeyboardInterrupt:
 if pipes.poll() is None:
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Stokes, Ian
> On Tue, Jul 24, 2018 at 11:25:26AM +, Stokes, Ian wrote:
> > > On 20/07/2018 16:56, Ian Stokes wrote:
> > > > Hi Ben,
> > > >
> > > > The following changes since commit
> > > > 3c921cc2b6b760bd0db73fd629ee9614edc8914c:
> > > >
> > > >build: Add gitattribute file to build-aux (2018-07-19 21:02:33
> > > > +0300)
> > > >
> > > > are available in the git repository at:
> > > >
> > > >https://github.com/istokes/ovs dpdk_merge
> > > >
> > > > for you to fetch changes up to
> 0e0e9e213f13be508d282ffefd7bbe8c680e4fc8:
> > > >
> > > >sparse: Add support for DPDK. (2018-07-20 15:44:45 +0100)
> > > >
> > >
> > > Hi Ian,
> > >
> > > After some discussions with Darrell around patch 8/14 of the
> > > "Support multi-segment mbufs​" patchset, I'll be sending a v6 of
> > > that series with that patch removed.
> > >
> > > Could that be included in this PR (meaning the previously included
> > > v5 of the series would be dropped entirely), since it still hasn't
> > > been merged to master?
> > >
> > > Thanks, and sorry for the trouble here.
> >
> > No problem, it makes more sense for a new pull request, otherwise there
> would be a revert for patch 8. If you send out a v6 with the required
> patch removed and any other changes (which are small I would think) I can
> review and apply to a new pull request.
> 
> Ian, would you mind just appending the changes to your current PR so that
> I can pull the whole thing in one go?

Sure, I've sent a new pull request that have all the changes ready and rebased 
to head of master just now.

https://mail.openvswitch.org/pipermail/ovs-dev/2018-July/350005.html

Thanks
Ian
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Stokes, Ian
Hi Ben,

The following changes since commit 1ac690899592f97520aa1c959a623175e642f0a4:
  

  NEWS: Mention daemon mode support for ovn-nbctl. (2018-07-24 09:14:28 -0700)

are available in the git repository at:

  https://github.com/istokes/ovs dpdk_merge

for you to fetch changes up to 557338de0d5074efeecf306ab8ef1da1c3adcaa3:

  sparse: Add support for DPDK. (2018-07-24 17:01:03 +0100)


Ben Pfaff (4):  
  netdev-dpdk: Fix incorrect byte order conversion in log message.
  netdev-dpdk: Fix sparse complaints. 
  netdev-dpdk: Use ETH_ADDR_BYTES_ARGS instead of open-coding it. 
  sparse: Add support for DPDK.   

Ian Stokes (1):
  Docs: Improve OVS DPDK version mapping notice.

Mark Kavanagh (4):
  netdev-dpdk: fix mbuf sizing
  dp-packet: Init specific mbuf fields.
  netdev-dpdk: copy large packet to multi-seg. mbufs
  netdev-dpdk: support multi-segment jumbo frames   

Michael Qiu (1):
  dp-packet: copy data from multi-seg. DPDK mbuf

Tiago Lam (8):
  dp-packet: Fix allocated size on DPDK init.
  netdev-dpdk: Serialise non-pmds mbufs' alloc/free.
  dp-packet: Fix data_len handling multi-seg mbufs.
  dp-packet: Handle multi-seg mbufs in helper funcs.
  dp-packet: Handle multi-seg mubfs in shift() func.
  dpdk-tests: Add uni-tests for multi-seg mbufs.
  dpdk-tests: Accept other configs in OVS_DPDK_START
  dpdk-tests: End-to-end tests for multi-seg mbufs.

Yipeng Wang (1):
  dpif-netdev: Add SMC cache after EMC cache

 Documentation/howto/dpdk.rst   |6 +-
 Documentation/intro/install/dpdk.rst   |6 +-
 Documentation/topics/dpdk/bridge.rst   |   15 ++
 Documentation/topics/dpdk/jumbo-frames.rst |   52 +++
 Documentation/topics/dpdk/memory.rst   |   36 +
 Makefile.am|2 +-
 NEWS   |3 +
 build-aux/initial-tab-whitelist|1 +
 include/sparse/automake.mk |9 ++
 include/sparse/rte_byteorder.h |  281 
+++
 include/sparse/rte_esp.h   |   65 +
 include/sparse/rte_flow.h  | 1483 
+
 include/sparse/rte_icmp.h  |  106 ++
 include/sparse/rte_ip.h|  490 
+
 include/sparse/rte_sctp.h  |  103 +
 include/sparse/rte_tcp.h   |  108 ++
 include/sparse/rte_udp.h   |  103 +
 include/sparse/xmmintrin.h |   24 +++
 lib/cmap.c |   74 ++
 lib/cmap.h |   11 ++
 lib/dp-packet.c|  173 --
 lib/dp-packet.h|  214 ---
 lib/dpdk.c |8 +
 lib/dpif-netdev-perf.h |1 +
 lib/dpif-netdev.c  |  329 
-
 lib/netdev-dpdk.c  |  270 
+++---
 lib/netdev-dpdk.h  |2 +
 tests/automake.mk  |   10 +-
 tests/dpdk-packet-mbufs.at |7 +
 tests/pmd.at   |7 +-
 tests/system-dpdk-macros.at|6 +-
 tests/system-dpdk-testsuite.at |1 +
 tests/system-dpdk.at   |   65 +
 tests/test-dpdk-mbufs.c|  513 

 vswitchd/vswitch.xml   |   35 +
 35 files changed, 4481 insertions(+), 138 deletions(-)
 create mode 100644 include/sparse/rte_byteorder.h
 create mode 100644 include/sparse/rte_esp.h
 create mode 100644 include/sparse/rte_flow.h
 create mode 100644 include/sparse/rte_icmp.h
 create mode 100644 include/sparse/rte_ip.h
 create mode 100644 include/sparse/rte_sctp.h
 create mode 100644 include/sparse/rte_tcp.h
 create mode 100644 include/sparse/rte_udp.h
 create mode 100644 include/sparse/xmmintrin.h
 create mode 100644 tests/dpdk-packet-mbufs.at
 create mode 100644 tests/test-dpdk-mbufs.c

Thanks
Ian

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 00/11] Get rid of ctl_fatal() calls in ovn-nbctl (part 2)

2018-07-24 Thread Ben Pfaff
On Tue, Jul 17, 2018 at 03:34:04PM +0200, Jakub Sitnicki wrote:
> This is a continuation of an earlier series that aims to replace calls to
> ctl_fatal() in command handlers in ovn-nbctl. The motivation is to handle 
> errors
> gracefully when running commands in daemon mode because as a long-lived 
> process
> we shouldn't terminate on errors that we can recover from.
> 
> After this series there are no more ctl_fatal() calls in ovn-nbctl that affect
> the daemon mode. The only remaining function left to convert is the commands
> parser in db-ctl-base module (ctl_parse_commands()), which I intend to deal 
> with
> separately. Either as a part of ovn-nbctl daemon series (already in review 
> [1]),
> or as a follow-up to it.
> 
> Thanks,
> Jakub
> 
> [1] https://patchwork.ozlabs.org/project/openvswitch/list/?series=55472

I applied this series to master.  Thank you!
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 11:25:26AM +, Stokes, Ian wrote:
> > On 20/07/2018 16:56, Ian Stokes wrote:
> > > Hi Ben,
> > >
> > > The following changes since commit
> > > 3c921cc2b6b760bd0db73fd629ee9614edc8914c:
> > >
> > >build: Add gitattribute file to build-aux (2018-07-19 21:02:33
> > > +0300)
> > >
> > > are available in the git repository at:
> > >
> > >https://github.com/istokes/ovs dpdk_merge
> > >
> > > for you to fetch changes up to 0e0e9e213f13be508d282ffefd7bbe8c680e4fc8:
> > >
> > >sparse: Add support for DPDK. (2018-07-20 15:44:45 +0100)
> > >
> > 
> > Hi Ian,
> > 
> > After some discussions with Darrell around patch 8/14 of the "Support
> > multi-segment mbufs​" patchset, I'll be sending a v6 of that series with
> > that patch removed.
> > 
> > Could that be included in this PR (meaning the previously included v5 of
> > the series would be dropped entirely), since it still hasn't been merged
> > to master?
> > 
> > Thanks, and sorry for the trouble here.
> 
> No problem, it makes more sense for a new pull request, otherwise there would 
> be a revert for patch 8. If you send out a v6 with the required patch removed 
> and any other changes (which are small I would think) I can review and apply 
> to a new pull request.

Ian, would you mind just appending the changes to your current PR so
that I can pull the whole thing in one go?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 10/11] ovn-nbctl: Remove pointless "return; " at ends of functions.

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 03:20:25PM +0200, Jakub Sitnicki wrote:
> On Mon, 23 Jul 2018 15:38:55 -0700
> Ben Pfaff  wrote:
> 
> > On Tue, Jul 17, 2018 at 03:34:14PM +0200, Jakub Sitnicki wrote:
> > > Fix fall-out from applying a semantic patch that converts ctl_fatal()
> > > calls to use ctl_error().
> > > 
> > > Signed-off-by: Jakub Sitnicki   
> > 
> > Oh, you fixed that up later ;-).  Ha, I'll revert it to your way
> > instead, never mind.
> 
> Yes, I've decided to do fix-ups separately so that the
> Coccinelle-generated patches could be reviewed based on the semantic
> patch. Not sure if that worked out fine or just created confusion.

I thought that these two were OK, but I'd probably insist on squashing
if one of the fix-ups could cause bisection problems.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] NEWS: Mention daemon mode support for ovn-nbctl.

2018-07-24 Thread Ben Pfaff
On Tue, Jul 24, 2018 at 04:21:58PM +0200, Jakub Sitnicki wrote:
> Signed-off-by: Jakub Sitnicki 
> ---
>  NEWS | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/NEWS b/NEWS
> index 057e8bfd3..2414b9199 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -31,6 +31,10 @@ Post-v2.9.0
>   * ACL match conditions can now match on Port_Groups as well as address
> sets that are automatically generated by Port_Groups.  ACLs can be
> applied directly to Port_Groups as well.
> + * ovn-nbctl can now run as a daemon (long-lived, background process)
> +   running commands in response to JSON-RPC requests received over a UNIX
> +   socket. Requests to run commands can be sent using ovs-appctl tool, 
> same
> +   as for any other OVS/OVN daemon. See ovn-nbctl(8) for details.
> - DPDK:
>   * New 'check-dpdk' Makefile target to run a new system testsuite.
> See Testing topic for the details.

Thanks, applied to master.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Revert "dp-packet: Handle multi-seg mbufs in resize__()."

2018-07-24 Thread Lam, Tiago
Please discard this patch as it becomes obsolete with v6 of the "Support
multi-segment mbufs" series.

On 20/07/2018 18:11, Tiago Lam wrote:
> This reverts commit bc4b614. The commit tries to alleviate the call to
> OVS_NOT_REACHED() in dp_packet_resize__(), for DPDK packets, by trying
> to reuse the available tailroom space when no more headroom space is
> available, and vice-versa. A simpler approach is to mitigate the call to
> dp_packet_resize__() first, when DPDK packets are in use. Later, if
> needed, this approach can be revisited.
> 
> Additionally, it also fixes the tests that were relying on the removed
> functionality.
> 
> CC: Darrell Ball 
> Signed-off-by: Tiago Lam 
> ---
> Note that the above commit, bc4b614, is in dpdk_merge still, and not yet 
> merged to master.
> ---
>  lib/dp-packet.c | 48 ++--
>  tests/test-dpdk-mbufs.c | 27 +++
>  2 files changed, 13 insertions(+), 62 deletions(-)
> 
> diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> index ae060e2..6773535 100644
> --- a/lib/dp-packet.c
> +++ b/lib/dp-packet.c
> @@ -285,51 +285,9 @@ dp_packet_resize__(struct dp_packet *b, size_t 
> new_headroom, size_t new_tailroom
>  new_allocated = new_headroom + dp_packet_size(b) + new_tailroom;
>  
>  switch (b->source) {
> -/* When resizing mbufs, both a single mbuf and multi-segment mbufs (where
> - * data is not contigously held in memory), both the headroom and the
> - * tailroom available will be used to make more space for where data 
> needs
> - * to be inserted. I.e if there's not enough headroom, data may be 
> shifted
> - * right if there's enough tailroom.
> - * However, this is not bulletproof and in some cases the space available
> - * won't be enough - in those cases, an error should be returned and the
> - * packet dropped. */
>  case DPBUF_DPDK:
> -{
> -size_t miss_len;
> -
> -if (new_headroom == dp_packet_headroom(b)) {
> -/* This is a tailroom adjustment. Since there's no tailroom space
> - * left, try and shift data towards the head to free up tail 
> space,
> - * if there's enough headroom */
> -
> -miss_len = new_tailroom - dp_packet_tailroom(b);
> -
> -if (miss_len <= new_headroom) {
> -dp_packet_shift(b, -miss_len);
> -} else {
> -/* XXX: Handle error case and report error to caller */
> -OVS_NOT_REACHED();
> -}
> -} else {
> -/* Otherwise, this is a headroom adjustment. Try to shift data
> - * towards the tail to free up head space, if there's enough
> - * tailroom */
> -
> -miss_len = new_headroom - dp_packet_headroom(b);
> -
> -
> -if (miss_len <= new_tailroom) {
> -dp_packet_shift(b, miss_len);
> -} else {
> -/* XXX: Handle error case and report error to caller */
> -OVS_NOT_REACHED();
> -}
> -}
> -
> -new_base = dp_packet_base(b);
> +OVS_NOT_REACHED();
>  
> -break;
> -}
>  case DPBUF_MALLOC:
>  if (new_headroom == dp_packet_headroom(b)) {
>  new_base = xrealloc(dp_packet_base(b), new_allocated);
> @@ -353,9 +311,7 @@ dp_packet_resize__(struct dp_packet *b, size_t 
> new_headroom, size_t new_tailroom
>  OVS_NOT_REACHED();
>  }
>  
> -if (b->source != DPBUF_DPDK) {
> -dp_packet_set_allocated(b, new_allocated);
> -}
> +dp_packet_set_allocated(b, new_allocated);
>  dp_packet_set_base(b, new_base);
>  
>  new_data = (char *) new_base + new_headroom;
> diff --git a/tests/test-dpdk-mbufs.c b/tests/test-dpdk-mbufs.c
> index 1c77038..8168cae 100644
> --- a/tests/test-dpdk-mbufs.c
> +++ b/tests/test-dpdk-mbufs.c
> @@ -215,15 +215,6 @@ dpdk_pkt_put(struct dp_packet *pkt, void *p, size_t 
> size) {
>  
>  dp_packet_mbuf_write(fmbuf, 0, size, p);
>  
> -/* Adjust size of intermediate mbufs from current tail to end */
> -/*size_t pkt_len = size;
> -while (fmbuf && pkt_len > 0) {
> -fmbuf->data_len = MIN(pkt_len, fmbuf->buf_len - fmbuf->data_off);
> -pkt_len -= fmbuf->data_len;
> -
> -fmbuf = fmbuf->next;
> -}*/
> -
>  dp_packet_set_size(pkt, size);
>  
>  return pkt;
> @@ -256,17 +247,21 @@ test_dpdk_packet_insert_tailroom_and_headroom(void) {
>  struct dp_packet *pkt = dpdk_mp_alloc_pkt(mp);
>  ovs_assert(pkt != NULL);
>  
> +/* Reserve 256B of header */
> +size_t head_len = 256;
> +dp_packet_reserve(pkt, head_len);
> +
>  /* Put the first 512B of "test_str" in the packet's header */
>  size_t str_len = 512;
>  char *p = dp_packet_put(pkt, test_str, str_len);
>  ovs_assert(p != NULL);
> -/* Allocate extra 256B of header */
> -size_t head_len = 256;
> +
> +/* Fill the reserved 256B 

[ovs-dev] [PATCH v6 13/13] dpdk-tests: End-to-end tests for multi-seg mbufs.

2018-07-24 Thread Tiago Lam
The following tests are added to the DPDK testsuite to add some
coverage for the multi-segment mbufs:
- Check that multi-segment mbufs are disabled by default;
- Check that providing `other_config:dpdk-multi-seg-mbufs=true` indeed
  enables mbufs;
- Using a DPDK port, send a random packet out and check that `ofctl
  dump-flows` shows the correct amount of packets and bytes sent.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 tests/system-dpdk.at | 65 
 1 file changed, 65 insertions(+)

diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index 3d21b01..af8de8c 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -71,3 +71,68 @@ OVS_VSWITCHD_STOP("/does not exist. The Open vSwitch kernel 
module is probably n
 ")
 AT_CLEANUP
 dnl --
+
+AT_SETUP([Jumbo frames - Multi-segment disabled by default])
+OVS_DPDK_START()
+
+AT_CHECK([grep "multi-segment mbufs enabled" ovs-vswitchd.log], [1], [])
+OVS_VSWITCHD_STOP("/Global register is changed during/d
+/EAL: No free hugepages reported in hugepages-1048576kB/d
+")
+AT_CLEANUP
+
+AT_SETUP([Jumbo frames - Multi-segment enabled])
+OVS_DPDK_START([dpdk-multi-seg-mbufs=true])
+AT_CHECK([grep "multi-segment mbufs enabled" ovs-vswitchd.log], [], [stdout])
+OVS_VSWITCHD_STOP("/Global register is changed during/d
+/EAL: No free hugepages reported in hugepages-1048576kB/d
+")
+AT_CLEANUP
+
+AT_SETUP([Jumbo frames - Multi-segment mbufs Tx])
+OVS_DPDK_PRE_CHECK()
+OVS_DPDK_START([per-port-memory=true dpdk-multi-seg-mbufs=true])
+
+dnl Add userspace bridge and attach it to OVS
+AT_CHECK([ovs-vsctl add-br br10 -- set bridge br10 datapath_type=netdev])
+AT_CHECK([ovs-vsctl add-port br10 dpdk0 \
+-- set Interface dpdk0 type=dpdk options:dpdk-devargs=$(cat PCI_ADDR) \
+-- set Interface dpdk0 mtu_request=9000], [], [stdout], [stderr])
+
+AT_CHECK([ovs-vsctl show], [], [stdout])
+
+dnl Add flows to send packets out from the 'dpdk0' port
+AT_CHECK([
+ovs-ofctl del-flows br10
+ovs-ofctl add-flow br10 in_port=LOCAL,actions=output:dpdk0
+], [], [stdout])
+
+AT_CHECK([ovs-ofctl dump-flows br10], [], [stdout])
+
+dnl Send packet out, of the 'dpdk0' port
+AT_CHECK([
+ARP_HEADER="09000B0009000A000806000108000604000100010A\
+0100020A02"
+dnl Build a random hex string to append to the ARP_HEADER
+RANDOM_BODY=$(printf '0102030405%.0s' {1..1750})
+dnl 8792B ARP packet
+RANDOM_ARP="$ARP_HEADER$RANDOM_BODY"
+
+ovs-ofctl packet-out br10 "packet=$RANDOM_ARP,action=resubmit:LOCAL"
+], [], [stdout])
+
+AT_CHECK([ovs-ofctl dump-flows br10], [0], [stdout])
+
+dnl Confirm the single packet as been sent with correct size
+AT_CHECK([ovs-ofctl dump-flows br10 | ofctl_strip | grep in_port], [0], [dnl
+ n_packets=1, n_bytes=8792, in_port=LOCAL actions=output:1
+])
+
+dnl Clean up
+OVS_VSWITCHD_STOP("/does not exist. The Open vSwitch kernel module is probably 
not loaded./d
+/Failed to enable flow control/d
+/failed to connect to \/tmp\/dpdkvhostclient0: No such file or directory/d
+/Global register is changed during/d
+/EAL: No free hugepages reported in hugepages-1048576kB/d
+")
+AT_CLEANUP
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 12/13] dpdk-tests: Accept other configs in OVS_DPDK_START

2018-07-24 Thread Tiago Lam
As it stands, OVS_DPDK_START() won't allow other configs to be set
before starting the ovs-vswitchd daemon. This is a problem since some
configs, such as the "dpdk-multi-seg-mbufs=true" for enabling the
multi-segment mbufs, need to be set prior to start OvS.

To support other options, OVS_DPDK_START() has been modified to accept
extra configs in the form "$config_name=$config_value". It then uses
ovs-vsctl to set the configs.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 tests/system-dpdk-macros.at | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/system-dpdk-macros.at b/tests/system-dpdk-macros.at
index 0762ee0..7c65834 100644
--- a/tests/system-dpdk-macros.at
+++ b/tests/system-dpdk-macros.at
@@ -21,7 +21,7 @@ m4_define([OVS_DPDK_PRE_CHECK],
 ])
 
 
-# OVS_DPDK_START()
+# OVS_DPDK_START([other-conf-args])
 #
 # Create an empty database and start ovsdb-server. Add special configuration
 # dpdk-init to enable DPDK functionality. Start ovs-vswitchd connected to that
@@ -48,6 +48,10 @@ m4_define([OVS_DPDK_START],
AT_CHECK([lscpu], [], [stdout])
AT_CHECK([cat stdout | grep "NUMA node(s)" | awk '{c=1; while (c++<$(3)) 
{printf "1024,"}; print "1024"}' > SOCKET_MEM])
AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . 
other_config:dpdk-socket-mem="$(cat SOCKET_MEM)"])
+   dnl Iterate through $other-conf-args list and include them
+   m4_foreach_w(opt, $1, [
+   AT_CHECK([ovs-vsctl --no-wait set Open_vSwitch . other_config:opt])
+   ])
 
dnl Start ovs-vswitchd.
AT_CHECK([ovs-vswitchd --detach --no-chdir --pidfile --log-file -vvconn 
-vofproto_dpif -vunixctl], [0], [stdout], [stderr])
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 11/13] dpdk-tests: Add uni-tests for multi-seg mbufs.

2018-07-24 Thread Tiago Lam
In order to create a minimal environment that allows the tests to get
mbufs from an existing mempool, the following approach is taken:
- EAL is initialised (by using the main dpdk_init()) and a (very) small
  mempool is instantiated (mimicking the logic in dpdk_mp_create()).
  This mempool instance is global and used by all the tests;
- Packets are then allocated from the instantiated mempool, and tested
  on, by running some operations on them and manipulating data.

The tests introduced focus on testing DPDK dp_packets (where
source=DPBUF_DPDK), linked with a single or multiple mbufs, across
several operations, such as:
- dp_packet_put();
- dp_packet_shift();
- dp_packet_reserve();
- dp_packet_push_uninit();
- dp_packet_clear();
- And as a consequence of some of these, dp_packet_put_uninit() and
  dp_packet_resize__().

Finally, this has also been integrated with the new DPDK testsuite.
Thus, when running `$sudo make check-dpdk` one will also be running
these tests.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 tests/automake.mk  |  10 +-
 tests/dpdk-packet-mbufs.at |   7 +
 tests/system-dpdk-testsuite.at |   1 +
 tests/test-dpdk-mbufs.c| 513 +
 4 files changed, 530 insertions(+), 1 deletion(-)
 create mode 100644 tests/dpdk-packet-mbufs.at
 create mode 100644 tests/test-dpdk-mbufs.c

diff --git a/tests/automake.mk b/tests/automake.mk
index 8224e5a..b3941d0 100644
--- a/tests/automake.mk
+++ b/tests/automake.mk
@@ -134,7 +134,8 @@ SYSTEM_DPDK_TESTSUITE_AT = \
tests/system-common-macros.at \
tests/system-dpdk-macros.at \
tests/system-dpdk-testsuite.at \
-   tests/system-dpdk.at
+   tests/system-dpdk.at \
+   tests/dpdk-packet-mbufs.at
 
 check_SCRIPTS += tests/atlocal
 
@@ -391,6 +392,10 @@ tests_ovstest_SOURCES = \
tests/test-vconn.c \
tests/test-aa.c \
tests/test-stopwatch.c
+if DPDK_NETDEV
+tests_ovstest_SOURCES = \
+   tests/test-dpdk-mbufs.c
+endif
 
 if !WIN32
 tests_ovstest_SOURCES += \
@@ -403,6 +408,9 @@ tests_ovstest_SOURCES += \
 endif
 
 tests_ovstest_LDADD = lib/libopenvswitch.la ovn/lib/libovn.la
+if DPDK_NETDEV
+tests_ovstest_LDFLAGS = $(AM_LDFLAGS) $(DPDK_vswitchd_LDFLAGS)
+endif
 
 noinst_PROGRAMS += tests/test-strtok_r
 tests_test_strtok_r_SOURCES = tests/test-strtok_r.c
diff --git a/tests/dpdk-packet-mbufs.at b/tests/dpdk-packet-mbufs.at
new file mode 100644
index 000..f28e4fc
--- /dev/null
+++ b/tests/dpdk-packet-mbufs.at
@@ -0,0 +1,7 @@
+AT_BANNER([OVS-DPDK dp_packet unit tests])
+
+AT_SETUP([OVS-DPDK dp_packet - mbufs allocation])
+AT_KEYWORDS([dp_packet, multi-seg, mbufs])
+AT_CHECK(ovstest test-dpdk-packet, [], [ignore], [ignore])
+
+AT_CLEANUP
diff --git a/tests/system-dpdk-testsuite.at b/tests/system-dpdk-testsuite.at
index 382f09e..f5edf58 100644
--- a/tests/system-dpdk-testsuite.at
+++ b/tests/system-dpdk-testsuite.at
@@ -23,3 +23,4 @@ m4_include([tests/system-common-macros.at])
 m4_include([tests/system-dpdk-macros.at])
 
 m4_include([tests/system-dpdk.at])
+m4_include([tests/dpdk-packet-mbufs.at])
diff --git a/tests/test-dpdk-mbufs.c b/tests/test-dpdk-mbufs.c
new file mode 100644
index 000..8168cae
--- /dev/null
+++ b/tests/test-dpdk-mbufs.c
@@ -0,0 +1,513 @@
+/*
+ * Copyright (c) 2018 Intel Corporation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#include "dp-packet.h"
+#include "ovstest.h"
+#include "dpdk.h"
+#include "smap.h"
+
+#define N_MBUFS 1024
+#define MBUF_DATA_LEN 2048
+
+int num_tests = 0;
+
+/* Global var to hold a mempool instance, "test-mp", used in all of the tests
+ * below. This instance is instantiated in dpdk_setup_eal_with_mp(). */
+static struct rte_mempool *mp;
+
+/* Test data used to fill the packets with data. Note that this isn't a string
+ * that repsents a valid packet, by any means. The pattern is generated in set_
+ * testing_pattern_str() and the sole purpose is to verify the data remains the
+ * same after inserting and operating on multi-segment mbufs. */
+static char *test_str;
+
+/* Asserts a dp_packet that holds a single mbuf, where:
+ * - nb_segs must be 1;
+ * - pkt_len must be equal to data_len which in turn must equal the provided
+ *   'pkt_len';
+ * - data_off must start at the provided 'data_ofs';
+ * - next must be NULL. */
+static void
+assert_single_mbuf(struct dp_packet 

[ovs-dev] [PATCH v6 08/13] dp-packet: copy data from multi-seg. DPDK mbuf

2018-07-24 Thread Tiago Lam
From: Michael Qiu 

When doing packet clone, if packet source is from DPDK driver,
multi-segment must be considered, and copy the segment's data one by
one.

Also, lots of DPDK mbuf's info is missed during a copy, like packet
type, ol_flags, etc.  That information is very important for DPDK to do
packets processing.

Co-authored-by: Mark Kavanagh 
Co-authored-by: Tiago Lam 

Signed-off-by: Michael Qiu 
Signed-off-by: Mark Kavanagh 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.c   | 71 ++-
 lib/dp-packet.h   |  3 +++
 lib/netdev-dpdk.c |  1 +
 3 files changed, 64 insertions(+), 11 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index d6e19eb..6773535 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -48,6 +48,23 @@ dp_packet_use__(struct dp_packet *b, void *base, size_t 
allocated,
 dp_packet_set_size(b, 0);
 }
 
+#ifdef DPDK_NETDEV
+void
+dp_packet_copy_mbuf_flags(struct dp_packet *dst, const struct dp_packet *src)
+{
+ovs_assert(dst != NULL && src != NULL);
+struct rte_mbuf *buf_dst = &(dst->mbuf);
+struct rte_mbuf buf_src = src->mbuf;
+
+buf_dst->nb_segs = buf_src.nb_segs;
+buf_dst->ol_flags = buf_src.ol_flags;
+buf_dst->packet_type = buf_src.packet_type;
+buf_dst->tx_offload = buf_src.tx_offload;
+}
+#else
+#define dp_packet_copy_mbuf_flags(arg1, arg2)
+#endif
+
 /* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes of
  * memory starting at 'base'.  'base' should be the first byte of a region
  * obtained from malloc().  It will be freed (with free()) if 'b' is resized or
@@ -158,6 +175,44 @@ dp_packet_clone(const struct dp_packet *buffer)
 return dp_packet_clone_with_headroom(buffer, 0);
 }
 
+#ifdef DPDK_NETDEV
+struct dp_packet *
+dp_packet_clone_with_headroom(const struct dp_packet *b, size_t headroom) {
+struct dp_packet *new_buffer;
+uint32_t pkt_len = dp_packet_size(b);
+
+/* copy multi-seg data */
+if (b->source == DPBUF_DPDK && b->mbuf.nb_segs > 1) {
+void *dst = NULL;
+struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *, >mbuf);
+
+new_buffer = dp_packet_new_with_headroom(pkt_len, headroom);
+dst = dp_packet_data(new_buffer);
+dp_packet_set_size(new_buffer, pkt_len);
+
+if (!rte_pktmbuf_read(mbuf, 0, pkt_len, dst)) {
+return NULL;
+}
+} else {
+new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(b),
+dp_packet_size(b),
+headroom);
+}
+
+/* Copy the following fields into the returned buffer: l2_pad_size,
+ * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
+memcpy(_buffer->l2_pad_size, >l2_pad_size,
+   sizeof(struct dp_packet) -
+   offsetof(struct dp_packet, l2_pad_size));
+
+dp_packet_copy_mbuf_flags(new_buffer, b);
+if (dp_packet_rss_valid(new_buffer)) {
+new_buffer->mbuf.hash.rss = b->mbuf.hash.rss;
+}
+
+return new_buffer;
+}
+#else
 /* Creates and returns a new dp_packet whose data are copied from 'buffer'.
  * The returned dp_packet will additionally have 'headroom' bytes of
  * headroom. */
@@ -165,32 +220,25 @@ struct dp_packet *
 dp_packet_clone_with_headroom(const struct dp_packet *buffer, size_t headroom)
 {
 struct dp_packet *new_buffer;
+uint32_t pkt_len = dp_packet_size(buffer);
 
 new_buffer = dp_packet_clone_data_with_headroom(dp_packet_data(buffer),
- dp_packet_size(buffer),
- headroom);
+ pkt_len, headroom);
+
 /* Copy the following fields into the returned buffer: l2_pad_size,
  * l2_5_ofs, l3_ofs, l4_ofs, cutlen, packet_type and md. */
 memcpy(_buffer->l2_pad_size, >l2_pad_size,
 sizeof(struct dp_packet) -
 offsetof(struct dp_packet, l2_pad_size));
 
-#ifdef DPDK_NETDEV
-new_buffer->mbuf.ol_flags = buffer->mbuf.ol_flags;
-#else
 new_buffer->rss_hash_valid = buffer->rss_hash_valid;
-#endif
-
 if (dp_packet_rss_valid(new_buffer)) {
-#ifdef DPDK_NETDEV
-new_buffer->mbuf.hash.rss = buffer->mbuf.hash.rss;
-#else
 new_buffer->rss_hash = buffer->rss_hash;
-#endif
 }
 
 return new_buffer;
 }
+#endif
 
 /* Creates and returns a new dp_packet that initially contains a copy of the
  * 'size' bytes of data starting at 'data' with no headroom or tailroom. */
@@ -330,6 +378,7 @@ dp_packet_mbuf_write(struct rte_mbuf *mbuf, int16_t ofs, 
uint32_t len,
 len -= len_copy;
 ofs = 0;
 
+mbuf->data_len = len_copy;
 mbuf = mbuf->next;
 }
 }
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 6ca4e98..d087cb3 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -124,6 +124,9 @@ void 

[ovs-dev] [PATCH v6 10/13] netdev-dpdk: support multi-segment jumbo frames

2018-07-24 Thread Tiago Lam
From: Mark Kavanagh 

Currently, jumbo frame support for OvS-DPDK is implemented by
increasing the size of mbufs within a mempool, such that each mbuf
within the pool is large enough to contain an entire jumbo frame of
a user-defined size. Typically, for each user-defined MTU,
'requested_mtu', a new mempool is created, containing mbufs of size
~requested_mtu.

With the multi-segment approach, a port uses a single mempool,
(containing standard/default-sized mbufs of ~2k bytes), irrespective
of the user-requested MTU value. To accommodate jumbo frames, mbufs
are chained together, where each mbuf in the chain stores a portion of
the jumbo frame. Each mbuf in the chain is termed a segment, hence the
name.

== Enabling multi-segment mbufs ==
Multi-segment and single-segment mbufs are mutually exclusive, and the
user must decide on which approach to adopt on init. The introduction
of a new OVSDB field, 'dpdk-multi-seg-mbufs', facilitates this. This
is a global boolean value, which determines how jumbo frames are
represented across all DPDK ports. In the absence of a user-supplied
value, 'dpdk-multi-seg-mbufs' defaults to false, i.e. multi-segment
mbufs must be explicitly enabled / single-segment mbufs remain the
default.

Setting the field is identical to setting existing DPDK-specific OVSDB
fields:

ovs-vsctl set Open_vSwitch . other_config:dpdk-init=true
ovs-vsctl set Open_vSwitch . other_config:dpdk-lcore-mask=0x10
ovs-vsctl set Open_vSwitch . other_config:dpdk-socket-mem=4096,0
==> ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true

Co-authored-by: Tiago Lam 

Signed-off-by: Mark Kavanagh 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 Documentation/topics/dpdk/jumbo-frames.rst | 52 +++
 Documentation/topics/dpdk/memory.rst   | 36 
 NEWS   |  1 +
 lib/dpdk.c |  8 
 lib/netdev-dpdk.c  | 66 ++
 lib/netdev-dpdk.h  |  2 +
 vswitchd/vswitch.xml   | 22 ++
 7 files changed, 179 insertions(+), 8 deletions(-)

diff --git a/Documentation/topics/dpdk/jumbo-frames.rst 
b/Documentation/topics/dpdk/jumbo-frames.rst
index 00360b4..89ce8de 100644
--- a/Documentation/topics/dpdk/jumbo-frames.rst
+++ b/Documentation/topics/dpdk/jumbo-frames.rst
@@ -71,3 +71,55 @@ Jumbo frame support has been validated against 9728B frames, 
which is the
 largest frame size supported by Fortville NIC using the DPDK i40e driver, but
 larger frames and other DPDK NIC drivers may be supported. These cases are
 common for use cases involving East-West traffic only.
+
+---
+Multi-segment mbufs
+---
+
+Instead of increasing the size of mbufs within a mempool, such that each mbuf
+within the pool is large enough to contain an entire jumbo frame of a
+user-defined size, mbufs can be chained together instead. In this approach each
+mbuf in the chain stores a portion of the jumbo frame, by default ~2K bytes,
+irrespective of the user-requested MTU value. Since each mbuf in the chain is
+termed a segment, this approach is named "multi-segment mbufs".
+
+This approach may bring more flexibility in use cases where the maximum packet
+length may be hard to guess. For example, in cases where packets originate from
+sources marked for oflload (such as TSO), each packet may be larger than the
+MTU, and as such, when forwarding it to a DPDK port a single mbuf may not be
+enough to hold all of the packet's data.
+
+Multi-segment and single-segment mbufs are mutually exclusive, and the user
+must decide on which approach to adopt on initialisation. If multi-segment
+mbufs is to be enabled, it can be done so with the following command::
+
+$ ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true
+
+Single-segment mbufs still remain the default when using OvS-DPDK, and the
+above option `dpdk-multi-seg-mbufs` must be explicitly set to `true` if
+multi-segment mbufs are to be used.
+
+~
+Performance notes
+~
+
+When using multi-segment mbufs some PMDs may not support vectorized Tx
+functions, due to its non-contiguous nature. As a result this can hit
+performance for smaller packet sizes. For example, on a setup sending 64B
+packets at line rate, a decrease of ~20% has been observed. The performance
+impact stops being noticeable for larger packet sizes, although the exact size
+will between PMDs, and depending on the architecture one's using.
+
+Tests performed with the i40e PMD driver only showed this limitation for 64B
+packets, and the same rate was observed when comparing multi-segment mbufs and
+single-segment mbuf for 128B packets. In other words, the 20% drop in
+performance was not observed for packets >= 128B during this test case.
+
+Because of this, multi-segment mbufs is not advised to be used with smaller
+packet sizes, 

[ovs-dev] [PATCH v6 09/13] netdev-dpdk: copy large packet to multi-seg. mbufs

2018-07-24 Thread Tiago Lam
From: Mark Kavanagh 

Currently, packets are only copied to a single segment in the function
dpdk_do_tx_copy(). This could be an issue in the case of jumbo frames,
particularly when multi-segment mbufs are involved.

This patch calculates the number of segments needed by a packet and
copies the data to each segment.

A new function, dpdk_buf_alloc(), has also been introduced as a wrapper
around the nonpmd_mp_mutex to serialise allocations from a non-pmd
context.

Co-authored-by: Michael Qiu 
Co-authored-by: Tiago Lam 

Signed-off-by: Mark Kavanagh 
Signed-off-by: Michael Qiu 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/netdev-dpdk.c | 84 +--
 1 file changed, 75 insertions(+), 9 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 50de1a2..f05ad36 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -552,6 +552,27 @@ dpdk_rte_mzalloc(size_t sz)
 return rte_zmalloc(OVS_VPORT_DPDK, sz, OVS_CACHE_LINE_SIZE);
 }
 
+static struct rte_mbuf *
+dpdk_buf_alloc(struct rte_mempool *mp)
+{
+struct rte_mbuf *mbuf = NULL;
+
+/* If non-pmd we need to lock on nonpmd_mp_mutex mutex */
+if (!dpdk_thread_is_pmd()) {
+ovs_mutex_lock(_mp_mutex);
+
+mbuf = rte_pktmbuf_alloc(mp);
+
+ovs_mutex_unlock(_mp_mutex);
+
+return mbuf;
+}
+
+mbuf = rte_pktmbuf_alloc(mp);
+
+return mbuf;
+}
+
 void
 free_dpdk_buf(struct dp_packet *packet)
 {
@@ -2320,6 +2341,49 @@ out:
 }
 }
 
+static int
+dpdk_copy_dp_packet_to_mbuf(struct dp_packet *packet, struct rte_mbuf **head,
+struct rte_mempool *mp)
+{
+struct rte_mbuf *mbuf, *fmbuf;
+uint32_t size = dp_packet_size(packet);
+uint16_t max_data_len;
+uint32_t nb_segs = 0;
+
+/* Allocate first mbuf to know the size of data available */
+fmbuf = mbuf = *head = dpdk_buf_alloc(mp);
+if (OVS_UNLIKELY(!mbuf)) {
+return ENOMEM;
+}
+
+/* All new allocated mbuf's max data len is the same */
+max_data_len = mbuf->buf_len - mbuf->data_off;
+
+/* Calculate # of output mbufs. */
+nb_segs = size / max_data_len;
+if (size % max_data_len) {
+nb_segs = nb_segs + 1;
+}
+
+/* Allocate additional mbufs, less the one alredy allocated above */
+for (int i = 1; i < nb_segs; i++) {
+mbuf->next = dpdk_buf_alloc(mp);
+if (!mbuf->next) {
+free_dpdk_buf(CONTAINER_OF(fmbuf, struct dp_packet, mbuf));
+fmbuf = NULL;
+return ENOMEM;
+}
+mbuf = mbuf->next;
+}
+
+fmbuf->nb_segs = nb_segs;
+fmbuf->pkt_len = size;
+
+dp_packet_mbuf_write(fmbuf, 0, size, dp_packet_data(packet));
+
+return 0;
+}
+
 /* Tx function. Transmit packets indefinitely */
 static void
 dpdk_do_tx_copy(struct netdev *netdev, int qid, struct dp_packet_batch *batch)
@@ -2336,6 +2400,7 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 struct rte_mbuf *pkts[PKT_ARRAY_SIZE];
 uint32_t cnt = batch_cnt;
 uint32_t dropped = 0;
+uint32_t i;
 
 if (dev->type != DPDK_DEV_VHOST) {
 /* Check if QoS has been configured for this netdev. */
@@ -2346,28 +2411,29 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid, struct 
dp_packet_batch *batch)
 
 uint32_t txcnt = 0;
 
-for (uint32_t i = 0; i < cnt; i++) {
+for (i = 0; i < cnt; i++) {
 struct dp_packet *packet = batch->packets[i];
 uint32_t size = dp_packet_size(packet);
+int err = 0;
 
 if (OVS_UNLIKELY(size > dev->max_packet_len)) {
 VLOG_WARN_RL(, "Too big size %u max_packet_len %d",
  size, dev->max_packet_len);
-
 dropped++;
 continue;
 }
 
-pkts[txcnt] = rte_pktmbuf_alloc(dev->dpdk_mp->mp);
-if (OVS_UNLIKELY(!pkts[txcnt])) {
+err = dpdk_copy_dp_packet_to_mbuf(packet, [txcnt],
+  dev->dpdk_mp->mp);
+if (err != 0) {
+if (err == ENOMEM) {
+VLOG_ERR_RL(, "Failed to alloc mbufs! %u packets dropped",
+cnt - i);
+}
+
 dropped += cnt - i;
 break;
 }
-
-/* We have to do a copy for now */
-memcpy(rte_pktmbuf_mtod(pkts[txcnt], void *),
-   dp_packet_data(packet), size);
-dp_packet_set_size((struct dp_packet *)pkts[txcnt], size);
 dp_packet_copy_mbuf_flags((struct dp_packet *)pkts[txcnt], packet);
 
 txcnt++;
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 07/13] dp-packet: Handle multi-seg mubfs in shift() func.

2018-07-24 Thread Tiago Lam
In its current implementation dp_packet_shift() is also unaware of
multi-seg mbufs (that holds data in memory non-contiguously) and assumes
that data exists contiguously in memory, memmove'ing data to perform the
shift.

To add support for multi-seg mbuds a new set of functions was
introduced, dp_packet_mbuf_shift() and dp_packet_mbuf_write(). These
functions are used by dp_packet_shift(), when handling multi-seg mbufs,
to shift and write data within a chain of mbufs.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.c | 97 +
 lib/dp-packet.h | 10 ++
 2 files changed, 107 insertions(+)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 2aaeaae..d6e19eb 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -294,6 +294,97 @@ dp_packet_prealloc_headroom(struct dp_packet *b, size_t 
size)
 }
 }
 
+#ifdef DPDK_NETDEV
+/* Write len data bytes in a mbuf at specified offset.
+ *
+ * 'mbuf', pointer to the destination mbuf where 'ofs' is, and the mbuf where
+ * the data will first be written.
+ * 'ofs', the offset within the provided 'mbuf' where 'data' is to be written.
+ * 'len', the size of the to be written 'data'.
+ * 'data', pointer to the to be written bytes.
+ *
+ * XXX: This function is the counterpart of the `rte_pktmbuf_read()` function
+ * available with DPDK, in the rte_mbuf.h */
+void
+dp_packet_mbuf_write(struct rte_mbuf *mbuf, int16_t ofs, uint32_t len,
+ const void *data)
+{
+char *dst_addr;
+uint16_t data_len;
+int len_copy;
+while (mbuf) {
+if (len == 0) {
+break;
+}
+
+dst_addr = rte_pktmbuf_mtod_offset(mbuf, char *, ofs);
+data_len = MBUF_BUF_END(mbuf->buf_addr, mbuf->buf_len) - dst_addr;
+
+len_copy = MIN(len, data_len);
+/* We don't know if 'data' is the result of a rte_pktmbuf_read() call,
+ * in which case we may end up writing to the same region of memory we
+ * are reading from and overlapping. Hence the use of memmove() here */
+memmove(dst_addr, data, len_copy);
+
+data = ((char *) data) + len_copy;
+len -= len_copy;
+ofs = 0;
+
+mbuf = mbuf->next;
+}
+}
+
+static void
+dp_packet_mbuf_shift_(struct rte_mbuf *dbuf, int16_t dst_ofs,
+  const struct rte_mbuf *sbuf, uint16_t src_ofs, int len)
+{
+char rd[len];
+const char *wd = rte_pktmbuf_read(sbuf, src_ofs, len, rd);
+
+ovs_assert(wd);
+
+dp_packet_mbuf_write(dbuf, dst_ofs, len, wd);
+}
+
+/* Similarly to dp_packet_shift(), shifts the data within the mbufs of a
+ * dp_packet of DPBUF_DPDK source by 'delta' bytes.
+ * Caller must make sure of the following conditions:
+ * - When shifting left, delta can't be bigger than the data_len available in
+ *   the last mbuf;
+ * - When shifting right, delta can't be bigger than the space available in the
+ *   first mbuf (buf_len - data_off).
+ * Both these conditions guarantee that a shift operation doesn't fall outside
+ * the bounds of the existing mbufs, so that the first and last mbufs (when
+ * using multi-segment mbufs), remain the same. */
+static void
+dp_packet_mbuf_shift(struct dp_packet *b, int delta)
+{
+uint16_t src_ofs;
+int16_t dst_ofs;
+
+struct rte_mbuf *mbuf = CONST_CAST(struct rte_mbuf *, >mbuf);
+struct rte_mbuf *tmbuf = rte_pktmbuf_lastseg(mbuf);
+
+if (delta < 0) {
+ovs_assert(-delta <= tmbuf->data_len);
+} else {
+ovs_assert(delta < (mbuf->buf_len - mbuf->data_off));
+}
+
+/* Set the destination and source offsets to copy to */
+dst_ofs = delta;
+src_ofs = 0;
+
+/* Shift data from src mbuf and offset to dst mbuf and offset */
+dp_packet_mbuf_shift_(mbuf, dst_ofs, mbuf, src_ofs,
+  rte_pktmbuf_pkt_len(mbuf));
+
+/* Update mbufs' properties, and if using multi-segment mbufs, first and
+ * last mbuf's data_len also needs to be adjusted */
+mbuf->data_off = mbuf->data_off + dst_ofs;
+}
+#endif
+
 /* Shifts all of the data within the allocated space in 'b' by 'delta' bytes.
  * For example, a 'delta' of 1 would cause each byte of data to move one byte
  * forward (from address 'p' to 'p+1'), and a 'delta' of -1 would cause each
@@ -306,6 +397,12 @@ dp_packet_shift(struct dp_packet *b, int delta)
: true);
 
 if (delta != 0) {
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+dp_packet_mbuf_shift(b, delta);
+return;
+}
+#endif
 char *dst = (char *) dp_packet_data(b) + delta;
 memmove(dst, dp_packet_data(b), dp_packet_size(b));
 dp_packet_set_data(b, dst);
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 14e2551..6ca4e98 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -80,6 +80,11 @@ struct dp_packet {
 };
 };
 
+#ifdef DPDK_NETDEV
+#define MBUF_BUF_END(BUF_ADDR, BUF_LEN) \
+(char *) (((char *) BUF_ADDR) 

[ovs-dev] [PATCH v6 06/13] dp-packet: Handle multi-seg mbufs in helper funcs.

2018-07-24 Thread Tiago Lam
Most helper functions in dp-packet assume that the data held by a
dp_packet is contiguous, and perform operations such as pointer
arithmetic under that assumption. However, with the introduction of
multi-segment mbufs, where data is non-contiguous, such assumptions are
no longer possible. Some examples of Such helper functions are
dp_packet_tail(), dp_packet_tailroom(), dp_packet_end(),
dp_packet_get_allocated() and dp_packet_at().

Thus, instead of assuming contiguous data in dp_packet, they  now
iterate over the (non-contiguous) data in mbufs to perform their
calculations.

Finally, dp_packet_use__() has also been modified to perform the
initialisation of the packet (and setting the source) before continuing
to set its size and data length, which now depends on the type of
packet.

Co-authored-by: Mark Kavanagh 

Signed-off-by: Mark Kavanagh 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.c |   4 +-
 lib/dp-packet.h | 114 +---
 2 files changed, 111 insertions(+), 7 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 782e7c2..2aaeaae 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -41,11 +41,11 @@ static void
 dp_packet_use__(struct dp_packet *b, void *base, size_t allocated,
  enum dp_packet_source source)
 {
+dp_packet_init__(b, allocated, source);
+
 dp_packet_set_base(b, base);
 dp_packet_set_data(b, base);
 dp_packet_set_size(b, 0);
-
-dp_packet_init__(b, allocated, source);
 }
 
 /* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes of
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index d2803af..14e2551 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -185,9 +185,25 @@ dp_packet_delete(struct dp_packet *b)
 static inline void *
 dp_packet_at(const struct dp_packet *b, size_t offset, size_t size)
 {
-return offset + size <= dp_packet_size(b)
-   ? (char *) dp_packet_data(b) + offset
-   : NULL;
+if (offset + size > dp_packet_size(b)) {
+return NULL;
+}
+
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, >mbuf);
+
+while (buf && offset > buf->data_len) {
+offset -= buf->data_len;
+
+buf = buf->next;
+}
+
+return buf ? rte_pktmbuf_mtod_offset(buf, char *, offset) : NULL;
+}
+#endif
+
+return (char *) dp_packet_data(b) + offset;
 }
 
 /* Returns a pointer to byte 'offset' in 'b', which must contain at least
@@ -196,13 +212,23 @@ static inline void *
 dp_packet_at_assert(const struct dp_packet *b, size_t offset, size_t size)
 {
 ovs_assert(offset + size <= dp_packet_size(b));
-return ((char *) dp_packet_data(b)) + offset;
+return dp_packet_at(b, offset, size);
 }
 
 /* Returns a pointer to byte following the last byte of data in use in 'b'. */
 static inline void *
 dp_packet_tail(const struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, >mbuf);
+/* Find last segment where data ends, meaning the tail of the chained
+ *  mbufs must be there */
+buf = rte_pktmbuf_lastseg(buf);
+
+return rte_pktmbuf_mtod_offset(buf, void *, buf->data_len);
+}
+#endif
 return (char *) dp_packet_data(b) + dp_packet_size(b);
 }
 
@@ -211,6 +237,15 @@ dp_packet_tail(const struct dp_packet *b)
 static inline void *
 dp_packet_end(const struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+struct rte_mbuf *buf = CONST_CAST(struct rte_mbuf *, &(b->mbuf));
+
+buf = rte_pktmbuf_lastseg(buf);
+
+return (char *) buf->buf_addr + buf->buf_len;
+}
+#endif
 return (char *) dp_packet_base(b) + dp_packet_get_allocated(b);
 }
 
@@ -236,6 +271,15 @@ dp_packet_tailroom(const struct dp_packet *b)
 static inline void
 dp_packet_clear(struct dp_packet *b)
 {
+#ifdef DPDK_NETDEV
+if (b->source == DPBUF_DPDK) {
+/* sets pkt_len and data_len to zero and frees unused mbufs */
+dp_packet_set_size(b, 0);
+rte_pktmbuf_reset(>mbuf);
+
+return;
+}
+#endif
 dp_packet_set_data(b, dp_packet_base(b));
 dp_packet_set_size(b, 0);
 }
@@ -252,12 +296,33 @@ dp_packet_pull(struct dp_packet *b, size_t size)
 return data;
 }
 
+#ifdef DPDK_NETDEV
+/* Similar to dp_packet_try_pull() but doesn't actually pull any data, only
+ * checks if it could and returns true or false accordingly.
+ *
+ * Valid for dp_packets carrying mbufs only. */
+static inline bool
+dp_packet_mbuf_may_pull(const struct dp_packet *b, size_t size) {
+if (size > b->mbuf.data_len) {
+return false;
+}
+
+return true;
+}
+#endif
+
 /* If 'b' has at least 'size' bytes of data, removes that many bytes from the
  * head end of 'b' and returns the first byte removed.  Otherwise, returns a
  * null pointer without modifying 

[ovs-dev] [PATCH v6 05/13] dp-packet: Fix data_len handling multi-seg mbufs.

2018-07-24 Thread Tiago Lam
When a dp_packet is from a DPDK source, and it contains multi-segment
mbufs, the data_len is not equal to the packet size, pkt_len. Instead,
the data_len of each mbuf in the chain should be considered while
distributing the new (provided) size.

To account for the above dp_packet_set_size() has been changed so that,
in the multi-segment mbufs case, only the data_len on the last mbuf of
the chain and the total size of the packet, pkt_len, are changed. The
data_len on the intermediate mbufs preceeding the last mbuf is not
changed by dp_packet_set_size(). Furthermore, in some cases
dp_packet_set_size() may be used to set a smaller size than the current
packet size, thus effectively trimming the end of the packet. In the
multi-segment mbufs case this may lead to lingering mbufs that may need
freeing.

__dp_packet_set_data() now also updates an mbufs' data_len after setting
the data offset. This is so that both fields are always in sync for each
mbuf in a chain.

Co-authored-by: Michael Qiu 
Co-authored-by: Mark Kavanagh 
Co-authored-by: Przemyslaw Lal 
Co-authored-by: Marcin Ksiadz 
Co-authored-by: Yuanhan Liu 

Signed-off-by: Michael Qiu 
Signed-off-by: Mark Kavanagh 
Signed-off-by: Przemyslaw Lal 
Signed-off-by: Marcin Ksiadz 
Signed-off-by: Yuanhan Liu 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.h | 76 -
 1 file changed, 64 insertions(+), 12 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 6376039..d2803af 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -429,17 +429,49 @@ dp_packet_size(const struct dp_packet *b)
 static inline void
 dp_packet_set_size(struct dp_packet *b, uint32_t v)
 {
-/* netdev-dpdk does not currently support segmentation; consequently, for
- * all intents and purposes, 'data_len' (16 bit) and 'pkt_len' (32 bit) may
- * be used interchangably.
- *
- * On the datapath, it is expected that the size of packets
- * (and thus 'v') will always be <= UINT16_MAX; this means that there is no
- * loss of accuracy in assigning 'v' to 'data_len'.
- */
-b->mbuf.data_len = (uint16_t)v;  /* Current seg length. */
-b->mbuf.pkt_len = v; /* Total length of all segments linked to
-  * this segment. */
+if (b->source == DPBUF_DPDK) {
+struct rte_mbuf *mbuf = >mbuf;
+uint16_t new_len = v;
+uint16_t data_len;
+uint16_t nb_segs = 0;
+uint16_t pkt_len = 0;
+
+/* Trim 'v' length bytes from the end of the chained buffers, freeing
+   any buffers that may be left floating */
+while (mbuf) {
+data_len = MIN(new_len, mbuf->data_len);
+mbuf->data_len = data_len;
+
+if (new_len - data_len <= 0) {
+/* Free the rest of chained mbufs */
+free_dpdk_buf(CONTAINER_OF(mbuf->next, struct dp_packet,
+   mbuf));
+mbuf->next = NULL;
+} else if (!mbuf->next) {
+/* Don't assign more than what we have available */
+mbuf->data_len = MIN(new_len,
+ mbuf->buf_len - mbuf->data_off);
+}
+
+new_len -= data_len;
+nb_segs += 1;
+pkt_len += mbuf->data_len;
+mbuf = mbuf->next;
+}
+
+/* pkt_len != v would effectively mean that pkt_len < than 'v' (as
+ * being bigger is logically impossible). Being < than 'v' would mean
+ * the 'v' provided was bigger than the available room, which is the
+ * responsibility of the caller to make sure there is enough room */
+ovs_assert(pkt_len == v);
+
+b->mbuf.nb_segs = nb_segs;
+b->mbuf.pkt_len = pkt_len;
+} else {
+b->mbuf.data_len = v;
+/* Total length of all segments linked to this segment. */
+b->mbuf.pkt_len = v;
+}
 }
 
 static inline uint16_t
@@ -451,7 +483,27 @@ __packet_data(const struct dp_packet *b)
 static inline void
 __packet_set_data(struct dp_packet *b, uint16_t v)
 {
-b->mbuf.data_off = v;
+if (b->source == DPBUF_DPDK) {
+/* Moving data_off away from the first mbuf in the chain is not a
+ * possibility using DPBUF_DPDK dp_packets */
+ovs_assert(v == UINT16_MAX || v <= b->mbuf.buf_len);
+
+uint16_t prev_ofs = b->mbuf.data_off;
+b->mbuf.data_off = v;
+int16_t ofs_diff = prev_ofs - b->mbuf.data_off;
+
+/* When dealing with DPDK mbufs, keep data_off and data_len in sync.
+ * Thus, update data_len if the length changes with the move of
+ * data_off. However, if data_len is 0, there's no data to move and
+ * data_len should remain 0. */
+
+if (b->mbuf.data_len != 0) {
+b->mbuf.data_len = MIN(b->mbuf.data_len + ofs_diff,
+   b->mbuf.buf_len - 

[ovs-dev] [PATCH v6 01/13] netdev-dpdk: fix mbuf sizing

2018-07-24 Thread Tiago Lam
From: Mark Kavanagh 

There are numerous factors that must be considered when calculating
the size of an mbuf:
- the data portion of the mbuf must be sized in accordance With Rx
  buffer alignment (typically 1024B). So, for example, in order to
  successfully receive and capture a 1500B packet, mbufs with a
  data portion of size 2048B must be used.
- in OvS, the elements that comprise an mbuf are:
  * the dp packet, which includes a struct rte mbuf (704B)
  * RTE_PKTMBUF_HEADROOM (128B)
  * packet data (aligned to 1k, as previously described)
  * RTE_PKTMBUF_TAILROOM (typically 0)

Some PMDs require that the total mbuf size (i.e. the total sum of all
of the above-listed components' lengths) is cache-aligned. To satisfy
this requirement, it may be necessary to round up the total mbuf size
with respect to cacheline size. In doing so, it's possible that the
dp_packet's data portion is inadvertently increased in size, such that
it no longer adheres to Rx buffer alignment. Consequently, the
following property of the mbuf no longer holds true:

mbuf.data_len == mbuf.buf_len - mbuf.data_off

This creates a problem in the case of multi-segment mbufs, where that
assumption is assumed to be true for all but the final segment in an
mbuf chain. Resolve this issue by adjusting the size of the mbuf's
private data portion, as opposed to the packet data portion when
aligning mbuf size to cachelines.

Fixes: 4be4d22 ("netdev-dpdk: clean up mbuf initialization")
Fixes: 31b88c9 ("netdev-dpdk: round up mbuf_size to cache_line_size")
CC: Santosh Shukla 
Signed-off-by: Mark Kavanagh 
Acked-by: Santosh Shukla 
Acked-by: Eelco Chaudron 
---
 lib/netdev-dpdk.c | 56 +--
 1 file changed, 38 insertions(+), 18 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index bb4d60f..949b87b 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -88,10 +88,6 @@ static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
20);
 #define MTU_TO_MAX_FRAME_LEN(mtu)   ((mtu) + ETHER_HDR_MAX_LEN)
 #define FRAME_LEN_TO_MTU(frame_len) ((frame_len)\
  - ETHER_HDR_LEN - ETHER_CRC_LEN)
-#define MBUF_SIZE(mtu)  ROUND_UP((MTU_TO_MAX_FRAME_LEN(mtu) \
- + sizeof(struct dp_packet) \
- + RTE_PKTMBUF_HEADROOM),   \
- RTE_CACHE_LINE_SIZE)
 #define NETDEV_DPDK_MBUF_ALIGN  1024
 #define NETDEV_DPDK_MAX_PKT_LEN 9728
 
@@ -637,7 +633,11 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool 
per_port_mp)
 char mp_name[RTE_MEMPOOL_NAMESIZE];
 const char *netdev_name = netdev_get_name(>up);
 int socket_id = dev->requested_socket_id;
-uint32_t n_mbufs;
+uint32_t n_mbufs = 0;
+uint32_t mbuf_size = 0;
+uint32_t aligned_mbuf_size = 0;
+uint32_t mbuf_priv_data_len = 0;
+uint32_t pkt_size = 0;
 uint32_t hash = hash_string(netdev_name, 0);
 struct dpdk_mp *dmp = NULL;
 int ret;
@@ -650,6 +650,9 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool 
per_port_mp)
 dmp->mtu = mtu;
 dmp->refcount = 1;
 
+/* Get the size of each mbuf, based on the MTU */
+mbuf_size = dpdk_buf_size(dev->requested_mtu);
+
 n_mbufs = dpdk_calculate_mbufs(dev, mtu, per_port_mp);
 
 do {
@@ -661,8 +664,8 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool 
per_port_mp)
  * so this is not an issue for tasks such as debugging.
  */
 ret = snprintf(mp_name, RTE_MEMPOOL_NAMESIZE,
-   "ovs%08x%02d%05d%07u",
-   hash, socket_id, mtu, n_mbufs);
+   "ovs%08x%02d%05d%07u",
+hash, socket_id, mtu, n_mbufs);
 if (ret < 0 || ret >= RTE_MEMPOOL_NAMESIZE) {
 VLOG_DBG("snprintf returned %d. "
  "Failed to generate a mempool name for \"%s\". "
@@ -671,17 +674,34 @@ dpdk_mp_create(struct netdev_dpdk *dev, int mtu, bool 
per_port_mp)
 break;
 }
 
-VLOG_DBG("Port %s: Requesting a mempool of %u mbufs "
-  "on socket %d for %d Rx and %d Tx queues.",
-  netdev_name, n_mbufs, socket_id,
-  dev->requested_n_rxq, dev->requested_n_txq);
-
-dmp->mp = rte_pktmbuf_pool_create(mp_name, n_mbufs,
-  MP_CACHE_SZ,
-  sizeof (struct dp_packet)
-  - sizeof (struct rte_mbuf),
-  MBUF_SIZE(mtu)
-  - sizeof(struct dp_packet),
+VLOG_DBG("Port %s: Requesting a mempool of %u mbufs of size %u "
+  "on socket %d for %d Rx and %d Tx queues, "
+  "cache line size of %u",
+  netdev_name, n_mbufs, 

[ovs-dev] [PATCH v6 03/13] dp-packet: Fix allocated size on DPDK init.

2018-07-24 Thread Tiago Lam
When enabled with DPDK OvS deals with two types of packets, the ones
coming from the mempool and the ones locally created by OvS - which are
copied to mempool mbufs before output. In the latter, the space is
allocated from the system, while in the former the mbufs are allocated
from a mempool, which takes care of initialising them appropriately.

In the current implementation, during mempool's initialisation of mbufs,
dp_packet_set_allocated() is called from dp_packet_init_dpdk() without
considering that the allocated space, in the case of multi-segment
mbufs, might be greater than a single mbuf.  Furthermore, given that
dp_packet_init_dpdk() is on the code path that's called upon mempool's
initialisation, a call to dp_packet_set_allocated() is redundant, since
mempool takes care of initialising it.

To fix this, dp_packet_set_allocated() is no longer called after
initialisation of a mempool, only in dp_packet_init__(), which is still
called by OvS when initialising locally created packets.

Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.c   | 3 +--
 lib/dp-packet.h   | 2 +-
 lib/netdev-dpdk.c | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 443c225..782e7c2 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -99,9 +99,8 @@ dp_packet_use_const(struct dp_packet *b, const void *data, 
size_t size)
  * buffer.  Here, non-transient ovs dp-packet fields are initialized for
  * packets that are part of a DPDK memory pool. */
 void
-dp_packet_init_dpdk(struct dp_packet *b, size_t allocated)
+dp_packet_init_dpdk(struct dp_packet *b)
 {
-dp_packet_set_allocated(b, allocated);
 b->source = DPBUF_DPDK;
 }
 
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index b948fe1..6376039 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -114,7 +114,7 @@ void dp_packet_use(struct dp_packet *, void *, size_t);
 void dp_packet_use_stub(struct dp_packet *, void *, size_t);
 void dp_packet_use_const(struct dp_packet *, const void *, size_t);
 
-void dp_packet_init_dpdk(struct dp_packet *, size_t allocated);
+void dp_packet_init_dpdk(struct dp_packet *);
 
 void dp_packet_init(struct dp_packet *, size_t);
 void dp_packet_uninit(struct dp_packet *);
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 949b87b..15d5479 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -550,7 +550,7 @@ ovs_rte_pktmbuf_init(struct rte_mempool *mp OVS_UNUSED,
 {
 struct rte_mbuf *pkt = _p;
 
-dp_packet_init_dpdk((struct dp_packet *) pkt, pkt->buf_len);
+dp_packet_init_dpdk((struct dp_packet *) pkt);
 }
 
 static int
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 02/13] dp-packet: Init specific mbuf fields.

2018-07-24 Thread Tiago Lam
From: Mark Kavanagh 

dp_packets are created using xmalloc(); in the case of OvS-DPDK, it's
possible the the resultant mbuf portion of the dp_packet contains
random data. For some mbuf fields, specifically those related to
multi-segment mbufs and/or offload features, random values may cause
unexpected behaviour, should the dp_packet's contents be later copied
to a DPDK mbuf. It is critical therefore, that these fields should be
initialized to 0.

This patch ensures that the following mbuf fields are initialized to
appropriate values on creation of a new dp_packet:
   - ol_flags=0
   - nb_segs=1
   - tx_offload=0
   - packet_type=0
   - next=NULL

Adapted from an idea by Michael Qiu :
https://patchwork.ozlabs.org/patch/777570/

Co-authored-by: Tiago Lam 

Signed-off-by: Mark Kavanagh 
Signed-off-by: Tiago Lam 
Acked-by: Eelco Chaudron 
---
 lib/dp-packet.h | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index ba91e58..b948fe1 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -625,14 +625,15 @@ dp_packet_mbuf_rss_flag_reset(struct dp_packet *p 
OVS_UNUSED)
 }
 
 /* This initialization is needed for packets that do not come
- * from DPDK interfaces, when vswitchd is built with --with-dpdk.
- * The DPDK rte library will still otherwise manage the mbuf.
- * We only need to initialize the mbuf ol_flags. */
+ * from DPDK interfaces, when vswitchd is built with --with-dpdk. */
 static inline void
 dp_packet_mbuf_init(struct dp_packet *p OVS_UNUSED)
 {
 #ifdef DPDK_NETDEV
-p->mbuf.ol_flags = 0;
+struct rte_mbuf *mbuf = &(p->mbuf);
+mbuf->ol_flags = mbuf->tx_offload = mbuf->packet_type = 0;
+mbuf->nb_segs = 1;
+mbuf->next = NULL;
 #endif
 }
 
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 00/13] Support multi-segment mbufs

2018-07-24 Thread Tiago Lam
Overview

This patchset introduces support for multi-segment mbufs to OvS-DPDK.
Multi-segment mbufs are typically used when the size of an mbuf is
insufficient to contain the entirety of a packet's data. Instead, the
data is split across numerous mbufs, each carrying a portion, or
'segment', of the packet data. mbufs are chained via their 'next'
attribute (an mbuf pointer).

Use Cases
=
i.  Handling oversized (guest-originated) frames, which are marked
for hardware accelration/offload (TSO, for example).

Packets which originate from a non-DPDK source may be marked for
offload; as such, they may be larger than the permitted ingress
interface's MTU, and may be stored in an oversized dp-packet. In
order to transmit such packets over a DPDK port, their contents
must be copied to a DPDK mbuf (via dpdk_do_tx_copy). However, in
its current implementation, that function only copies data into
a single mbuf; if the space available in the mbuf is exhausted,
but not all packet data has been copied, then it is lost.
Similarly, when cloning a DPDK mbuf, it must be considered
whether that mbuf contains multiple segments. Both issues are
resolved within this patchset.

ii. Handling jumbo frames.

While OvS already supports jumbo frames, it does so by increasing
mbuf size, such that the entirety of a jumbo frame may be handled
in a single mbuf. This is certainly the preferred, and most
performant approach (and remains the default).

Enabling multi-segment mbufs

Multi-segment and single-segment mbufs are mutually exclusive, and the
user must decide on which approach to adopt on init. The introduction
of a new OVSDB field, 'dpdk-multi-seg-mbufs', facilitates this.

This is a global boolean value, which determines how jumbo frames are
represented across all DPDK ports. In the absence of a user-supplied
value, 'dpdk-multi-seg-mbufs' defaults to false, i.e. multi-segment
mbufs must be explicitly enabled / single-segment mbufs remain the
default.

Setting the field is identical to setting existing DPDK-specific OVSDB
fields:

ovs-vsctl set Open_vSwitch . other_config:dpdk-init=true
ovs-vsctl set Open_vSwitch . other_config:dpdk-lcore-mask=0x10
ovs-vsctl set Open_vSwitch . other_config:dpdk-socket-mem=4096,0
==> ovs-vsctl set Open_vSwitch . other_config:dpdk-multi-seg-mbufs=true

Performance notes (based on v8)
=
In order to test for regressions in performance, tests were run on top
of master 88125d6 and v8 of this patchset, both with the multi-segment
mbufs option enabled and disabled.

VSperf was used to run the phy2phy_cont and pvp_cont tests with varying
packet sizes of 64B, 1500B and 7000B, on a 10Gbps interface.

Test | Size | Master | Multi-seg disabled | Multi-seg enabled
-
p2p  |  64  | ~22.7  |  ~22.65|   ~18.3
p2p  | 1500 |  ~1.6  |~1.6|~1.6
p2p  | 7000 | ~0.36  |   ~0.36|   ~0.36
pvp  |  64  |  ~6.7  |~6.7|~6.3
pvp  | 1500 |  ~1.6  |~1.6|~1.6
pvp  | 7000 | ~0.36  |   ~0.36|   ~0.36

Packet size is in bytes, while all packet rates are reported in mpps
(aggregated).

No noticeable regression has been observed (certainly everything is
within the ± 5% margin of existing performance), aside from the 64B
packet size case when multi-segment mbuf is enabled. This is
expected, however, because of how Tx vectoriszed functions are
incompatible with multi-segment mbufs on some PMDs. The PMD under
use during these tests was the i40e (on a Intel X710 NIC), which
indeed doesn't support vectorized Tx functions with multi-segment
mbufs.

---
v6: - Rebase on master d1b235d ("tests: Add test for ovn-nbctl's command parser
  error paths.");
- Address Darrell's comments:
  - The changes in dp_packet_resize__() were trying to alleviate the call
to OVS_NOT_REACHED() for DPDK packets, by trying to reuse the available
tailroom space when no more headroom space is available, and vice-versa.
However, this was breaking the API for the dp_packet_resize__()
function (only called in dp_packet_prealloc_tailroom() and
dp_packet_prealloc_headroom()), which doesn't seem to suit the purposes
for DPDK packets.
Instead, and because this is isolate funtionality, revert to the
previous state where dp_packet_resize__() is not supported for DPDK
packets. Hence, then patch 08/14 has been dropped.
- Additionally, fix the tests that were relying on the removed
  functionality.

v5: - Rebase on master 030958a0cc ("conntrack: Fix conn_update_state_alg use
  after free.");
- Address Eelco's comments:
  - Remove dpdk_mp_sweep() call in netdev_dpdk_mempool_configure(), a
leftover from rebase. Only call should be in dpdk_mp_get();
  - Remove 

[ovs-dev] [PATCH] NEWS: Mention daemon mode support for ovn-nbctl.

2018-07-24 Thread Jakub Sitnicki
Signed-off-by: Jakub Sitnicki 
---
 NEWS | 4 
 1 file changed, 4 insertions(+)

diff --git a/NEWS b/NEWS
index 057e8bfd3..2414b9199 100644
--- a/NEWS
+++ b/NEWS
@@ -31,6 +31,10 @@ Post-v2.9.0
  * ACL match conditions can now match on Port_Groups as well as address
sets that are automatically generated by Port_Groups.  ACLs can be
applied directly to Port_Groups as well.
+ * ovn-nbctl can now run as a daemon (long-lived, background process)
+   running commands in response to JSON-RPC requests received over a UNIX
+   socket. Requests to run commands can be sent using ovs-appctl tool, same
+   as for any other OVS/OVN daemon. See ovn-nbctl(8) for details.
- DPDK:
  * New 'check-dpdk' Makefile target to run a new system testsuite.
See Testing topic for the details.
-- 
2.14.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v5 00/21] Daemon mode for ovn-nbctl

2018-07-24 Thread Jakub Sitnicki
On Mon, 23 Jul 2018 16:47:52 -0700
Ben Pfaff  wrote:

> On Thu, Jul 19, 2018 at 03:51:05PM +0200, Jakub Sitnicki wrote:
> > This series extends ovn-nbctl tool with support for the daemon mode, where
> > ovn-nbctl acts a long-lived process that accepts commands over a UNIX 
> > socket.
> > The daemon can be started the same way as any other OVS/OVN server:
> > 
> >   ovn-nbctl --detach --pidfile --log-file  
> 
> Thanks a lot.  I applied this series to master.
> 
> Would you mind sending an additional patch to add an appropriate item to
> NEWS?

With pleasure.
 
> I'm going to play with some ideas for tests.

Thanks, I don't have it figured out yet.

-Jakub

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 10/11] ovn-nbctl: Remove pointless "return; " at ends of functions.

2018-07-24 Thread Jakub Sitnicki
On Mon, 23 Jul 2018 15:38:55 -0700
Ben Pfaff  wrote:

> On Tue, Jul 17, 2018 at 03:34:14PM +0200, Jakub Sitnicki wrote:
> > Fix fall-out from applying a semantic patch that converts ctl_fatal()
> > calls to use ctl_error().
> > 
> > Signed-off-by: Jakub Sitnicki   
> 
> Oh, you fixed that up later ;-).  Ha, I'll revert it to your way
> instead, never mind.

Yes, I've decided to do fix-ups separately so that the
Coccinelle-generated patches could be reviewed based on the semantic
patch. Not sure if that worked out fine or just created confusion.

-Jakub
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 08/11] ovn-nbctl: Propagate error thru the context.

2018-07-24 Thread Jakub Sitnicki
On Mon, 23 Jul 2018 15:36:28 -0700
Ben Pfaff  wrote:

> On Tue, Jul 17, 2018 at 03:34:12PM +0200, Jakub Sitnicki wrote:
> > Instead of dying let the main loop handle the error.
> > This will allow us to report errors when running in daemon mode.
> > 
> > This is a result of applying the following semantic patch:
> > 
> > @@
> > identifier F;
> > identifier C;
> > identifier E;
> > @@
> >   static void F(struct ctl_context *C) {
> > <...
> >   if (E) {
> > - ctl_fatal("%s", E);
> > + C->error = E;
> > + return;
> >   }  
> > ...>  
> >   }
> > 
> > Signed-off-by: Jakub Sitnicki   
> 
> It's too bad that this generates code like this:
> 
>  char *error = ls_by_name_or_uuid(ctx, ctx->argv[1], false, );
>  if (error) {
> ctx->error = error;
> return;
>  }
> 
> instead of the simpler:
> 
>  ctx->error = ls_by_name_or_uuid(ctx, ctx->argv[1], false, );
>  if (ctx->error) {
> return;
>  }
> 
> Oh well.

Let me see if I can come up with a semantic patch that corrects that.

-Jakub
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 06/11] ovn-nbctl: Don't die in parse_direction().

2018-07-24 Thread Jakub Sitnicki
On Mon, 23 Jul 2018 15:33:26 -0700
Ben Pfaff  wrote:

> On Tue, Jul 17, 2018 at 03:34:10PM +0200, Jakub Sitnicki wrote:
> > Let the caller handle the error. This prepares us for reporting errors
> > in daemon mode.
> > 
> > Signed-off-by: Jakub Sitnicki   
> 
> I got a lot of "possibly uninitialized" warnings from GCC for this one:
> 
> ../ovn/utilities/ovn-nbctl.c: In function ‘nbctl_qos_del’:
> ../ovn/utilities/ovn-nbctl.c:2068:17: error: ‘direction’ may be used 
> uninitialized in this function [-Werror=maybe-uninitialized]
>  if (strcmp(direction, ls->qos_rules[i]->direction)) {
>  ^~
> ../ovn/utilities/ovn-nbctl.c: In function ‘nbctl_acl_add’:
> ../ovn/utilities/ovn-nbctl.c:1739:5: error: ‘direction’ may be used 
> uninitialized in this function [-Werror=maybe-uninitialized]
>  nbrec_acl_set_direction(acl, direction);
>  ^~~
> ../ovn/utilities/ovn-nbctl.c: In function ‘nbctl_acl_del’:
> ../ovn/utilities/ovn-nbctl.c:1829:17: error: ‘direction’ may be used 
> uninitialized in this function [-Werror=maybe-uninitialized]
>  if (strcmp(direction, acls[i]->direction)) {
>  ^
> ../ovn/utilities/ovn-nbctl.c: In function ‘nbctl_qos_add’:
> ../ovn/utilities/ovn-nbctl.c:1994:5: error: ‘direction’ may be used 
> uninitialized in this function [-Werror=maybe-uninitialized]
>  nbrec_qos_set_direction(qos, direction);
>  ^~~
> ../ovn/utilities/ovn-nbctl.c: At top level:
> 
> I think it's wrong but the following incremental solved it so I folded
> it in:
> 
> diff --git a/ovn/utilities/ovn-nbctl.c b/ovn/utilities/ovn-nbctl.c
> index c18fa28256af..a4a533740cb9 100644
> --- a/ovn/utilities/ovn-nbctl.c
> +++ b/ovn/utilities/ovn-nbctl.c
> @@ -1680,6 +1680,7 @@ parse_direction(const char *arg, const char 
> **direction_p)
>  } else if (arg[0] == 'f') {
>  *direction_p = "from-lport";
>  } else {
> +*direction_p = NULL;
>  return xasprintf("%s: direction must be \"to-lport\" or "
>   "\"from-lport\"", arg);
>  }
> 

Thank you for fixing it up. It's an oversight on my side, I've only
build tested with Clang. Wondering why 0-day bot wasn't complaining.
Maybe it's just newer versions of GCC that detect this.

Thanks,
Jakub
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Reply Urgent.

2018-07-24 Thread Mr. Hameto Sani
Dear Friend,

 (TREAT THIS MATTER CONFIDENTIAL)

I am a banker by profession, I want to release an abandoned sum of ($ 25.3 
Million Dollars) to you for my future investment. Can you provide me with an 
account to receive the sum. you will take 40% as your share.

Please send me your information before proceeding.

1) Your Names:
2) Your Age:
3) Your Address:
4) Your occupation:
5) Your Contact Phone:
6) Your Country:

I need thesis information to know you very well before we proceed because i 
will resign from this bank your immediate confirmed this fund in your position

I wait for your reply as soon as possible.
Thanks,

Mr.Hameto Sani.
Reply to:  mrhamet...@gmail.com
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Improved Packet Drop Statistics in OVS.

2018-07-24 Thread Keshav Gupta
Hi Federico and all
 I will work on this Rohitha's patch to take it forward.

Thanks
Keshav

-Original Message-
From: ovs-dev-boun...@openvswitch.org [mailto:ovs-dev-boun...@openvswitch.org] 
On Behalf Of Federico Iezzi
Sent: Tuesday, July 24, 2018 1:23 PM
To: ian.sto...@intel.com
Cc: d...@openvswitch.org
Subject: Re: [ovs-dev] [PATCH] Improved Packet Drop Statistics in OVS.

Hi all,

It seems Rohith Basavaraja email has been disabled.
I believe it's too late for someone else to pick up Rohith's work, right?

Federico

On Mon, 11 Jun 2018 at 10:33, Stokes, Ian  wrote:

> > I'm very sorry for double posting.
> > I believe this patch would represent a major feature in term of 
> > OVS's usability.
> > Has anyone got any time for reviewing it?
> >
>
> I'm planning to look at it this week.
>
> Thanks
> Ian
>
> > Thanks,
> > Federico
> >
> > On 6 June 2018 at 17:37, Jan Scheurich 
> wrote:
> >
> > > The user-space part for packet drop stats should be generic and 
> > > work with any dpif datapath.
> > > So, if someone implemented the equivalent drop stats functionality 
> > > in the kernel datapath that would be very welcome.
> > > We in Ericsson cannot do that currently due to license restrictions.
> > >
> > > Regards, Jan
> > >
> > > > -Original Message-
> > > > From: ovs-dev-boun...@openvswitch.org 
> > > > 
> > > On Behalf Of Rohith Basavaraja
> > > > Sent: Friday, 25 May, 2018 07:37
> > > > To: Ben Pfaff 
> > > > Cc: d...@openvswitch.org
> > > > Subject: Re: [ovs-dev] [PATCH] Improved Packet Drop Statistics 
> > > > in
> OVS.
> > > >
> > > > Thanks Ben for the clarification. Yes this new stuff is used 
> > > > only in the DPDK datapath and it’s not used in the kernel datapath.
> > > >
> > > > Thanks
> > > > Rohith
> > > >
> > > > On 25/05/18, 2:52 AM, "Ben Pfaff"  wrote:
> > > >
> > > > On Thu, May 24, 2018 at 02:19:06AM +, Rohith Basavaraja
> wrote:
> > > > > Only  changes in
> > > > > datapath/linux/compat/include/linux/openvswitch.h
> > > > > are related to OvS Kernel module.
> > > >
> > > > On a second look, I see that the new stuff here is only for 
> > > > the
> > DPDK
> > > > datapath.  If you don't intend to add this feature to the kernel
> > > > datapath, there should be no problem.  Never mind.
> > > >
> > > >
> > > > ___
> > > > dev mailing list
> > > > d...@openvswitch.org
> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > ___
> > > dev mailing list
> > > d...@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Number of queues at egress port available in OVS !

2018-07-24 Thread rakesh kumar
Hello All,

How many queues are available at egress port in OVS, How do we queue the
packets on the basis of PCP field i.e 3 bit in 802.1Q header ?



Regards
Rakesh Kumar
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Stokes, Ian
> On 20/07/2018 16:56, Ian Stokes wrote:
> > Hi Ben,
> >
> > The following changes since commit
> > 3c921cc2b6b760bd0db73fd629ee9614edc8914c:
> >
> >build: Add gitattribute file to build-aux (2018-07-19 21:02:33
> > +0300)
> >
> > are available in the git repository at:
> >
> >https://github.com/istokes/ovs dpdk_merge
> >
> > for you to fetch changes up to 0e0e9e213f13be508d282ffefd7bbe8c680e4fc8:
> >
> >sparse: Add support for DPDK. (2018-07-20 15:44:45 +0100)
> >
> 
> Hi Ian,
> 
> After some discussions with Darrell around patch 8/14 of the "Support
> multi-segment mbufs​" patchset, I'll be sending a v6 of that series with
> that patch removed.
> 
> Could that be included in this PR (meaning the previously included v5 of
> the series would be dropped entirely), since it still hasn't been merged
> to master?
> 
> Thanks, and sorry for the trouble here.

No problem, it makes more sense for a new pull request, otherwise there would 
be a revert for patch 8. If you send out a v6 with the required patch removed 
and any other changes (which are small I would think) I can review and apply to 
a new pull request.

Ian

> 
> Tiago.
> 
> > 
> > Ben Pfaff (4):
> >netdev-dpdk: Fix incorrect byte order conversion in log message.
> >netdev-dpdk: Fix sparse complaints.
> >netdev-dpdk: Use ETH_ADDR_BYTES_ARGS instead of open-coding it.
> >sparse: Add support for DPDK.
> >
> > Ian Stokes (1):
> >Docs: Improve OVS DPDK version mapping notice.
> >
> > Mark Kavanagh (4):
> >netdev-dpdk: fix mbuf sizing
> >dp-packet: Init specific mbuf fields.
> >netdev-dpdk: copy large packet to multi-seg. mbufs
> >netdev-dpdk: support multi-segment jumbo frames
> >
> > Michael Qiu (1):
> >dp-packet: copy data from multi-seg. DPDK mbuf
> >
> > Tiago Lam (9):
> >dp-packet: Fix allocated size on DPDK init.
> >netdev-dpdk: Serialise non-pmds mbufs' alloc/free.
> >dp-packet: Fix data_len handling multi-seg mbufs.
> >dp-packet: Handle multi-seg mbufs in helper funcs.
> >dp-packet: Handle multi-seg mubfs in shift() func.
> >dp-packet: Handle multi-seg mbufs in resize__().
> >dpdk-tests: Add uni-tests for multi-seg mbufs.
> >dpdk-tests: Accept other configs in OVS_DPDK_START
> >dpdk-tests: End-to-end tests for multi-seg mbufs.
> >
> > Yipeng Wang (1):
> >dpif-netdev: Add SMC cache after EMC cache
> >
> >   Documentation/howto/dpdk.rst   |6 +-
> >   Documentation/intro/install/dpdk.rst   |6 +-
> >   Documentation/topics/dpdk/bridge.rst   |   15 ++
> >   Documentation/topics/dpdk/jumbo-frames.rst |   52 +++
> >   Documentation/topics/dpdk/memory.rst   |   36 +
> >   Makefile.am|2 +-
> >   NEWS   |3 +
> >   build-aux/initial-tab-whitelist|1 +
> >   include/sparse/automake.mk |9 ++
> >   include/sparse/rte_byteorder.h |  281
> > +++
> >   include/sparse/rte_esp.h   |   65 +
> >   include/sparse/rte_flow.h  | 1483
> >
> ++
> ++
> +
> >   include/sparse/rte_icmp.h  |  106 ++
> >   include/sparse/rte_ip.h|  490
> > +
> >   include/sparse/rte_sctp.h  |  103 +
> >   include/sparse/rte_tcp.h   |  108 ++
> >   include/sparse/rte_udp.h   |  103 +
> >   include/sparse/xmmintrin.h |   24 +++
> >   lib/cmap.c |   74 ++
> >   lib/cmap.h |   11 ++
> >   lib/dp-packet.c|  221
> > ++--
> >   lib/dp-packet.h|  214
> > ---
> >   lib/dpdk.c |8 +
> >   lib/dpif-netdev-perf.h |1 +
> >   lib/dpif-netdev.c  |  329
> > -
> >   lib/netdev-dpdk.c  |  270
> > +++---
> >   lib/netdev-dpdk.h  |2 +
> >   tests/automake.mk  |   10 +-
> >   tests/dpdk-packet-mbufs.at |7 +
> >   tests/pmd.at   |7 +-
> >   tests/system-dpdk-macros.at|6 +-
> >   tests/system-dpdk-testsuite.at |1 +
> >   tests/system-dpdk.at   |   65 +
> >   

Re: [ovs-dev] OVS DPDK: dpdk_merge pull request for master

2018-07-24 Thread Lam, Tiago
On 20/07/2018 16:56, Ian Stokes wrote:
> Hi Ben,
> 
> The following changes since commit 
> 3c921cc2b6b760bd0db73fd629ee9614edc8914c:
> 
>build: Add gitattribute file to build-aux (2018-07-19 21:02:33 +0300)
> 
> are available in the git repository at:
> 
>https://github.com/istokes/ovs dpdk_merge
> 
> for you to fetch changes up to 0e0e9e213f13be508d282ffefd7bbe8c680e4fc8:
> 
>sparse: Add support for DPDK. (2018-07-20 15:44:45 +0100)
> 

Hi Ian,

After some discussions with Darrell around patch 8/14 of the "Support
multi-segment mbufs​" patchset, I'll be sending a v6 of that series with
that patch removed.

Could that be included in this PR (meaning the previously included v5 of
the series would be dropped entirely), since it still hasn't been merged
to master?

Thanks, and sorry for the trouble here.

Tiago.

> 
> Ben Pfaff (4):
>netdev-dpdk: Fix incorrect byte order conversion in log message.
>netdev-dpdk: Fix sparse complaints.
>netdev-dpdk: Use ETH_ADDR_BYTES_ARGS instead of open-coding it.
>sparse: Add support for DPDK.
> 
> Ian Stokes (1):
>Docs: Improve OVS DPDK version mapping notice.
> 
> Mark Kavanagh (4):
>netdev-dpdk: fix mbuf sizing
>dp-packet: Init specific mbuf fields.
>netdev-dpdk: copy large packet to multi-seg. mbufs
>netdev-dpdk: support multi-segment jumbo frames
> 
> Michael Qiu (1):
>dp-packet: copy data from multi-seg. DPDK mbuf
> 
> Tiago Lam (9):
>dp-packet: Fix allocated size on DPDK init.
>netdev-dpdk: Serialise non-pmds mbufs' alloc/free.
>dp-packet: Fix data_len handling multi-seg mbufs.
>dp-packet: Handle multi-seg mbufs in helper funcs.
>dp-packet: Handle multi-seg mubfs in shift() func.
>dp-packet: Handle multi-seg mbufs in resize__().
>dpdk-tests: Add uni-tests for multi-seg mbufs.
>dpdk-tests: Accept other configs in OVS_DPDK_START
>dpdk-tests: End-to-end tests for multi-seg mbufs.
> 
> Yipeng Wang (1):
>dpif-netdev: Add SMC cache after EMC cache
> 
>   Documentation/howto/dpdk.rst   |6 +-
>   Documentation/intro/install/dpdk.rst   |6 +-
>   Documentation/topics/dpdk/bridge.rst   |   15 ++
>   Documentation/topics/dpdk/jumbo-frames.rst |   52 +++
>   Documentation/topics/dpdk/memory.rst   |   36 +
>   Makefile.am|2 +-
>   NEWS   |3 +
>   build-aux/initial-tab-whitelist|1 +
>   include/sparse/automake.mk |9 ++
>   include/sparse/rte_byteorder.h |  281 
> +++
>   include/sparse/rte_esp.h   |   65 +
>   include/sparse/rte_flow.h  | 1483 
> +
>   include/sparse/rte_icmp.h  |  106 ++
>   include/sparse/rte_ip.h|  490 
> +
>   include/sparse/rte_sctp.h  |  103 +
>   include/sparse/rte_tcp.h   |  108 ++
>   include/sparse/rte_udp.h   |  103 +
>   include/sparse/xmmintrin.h |   24 +++
>   lib/cmap.c |   74 ++
>   lib/cmap.h |   11 ++
>   lib/dp-packet.c|  221 
> ++--
>   lib/dp-packet.h|  214 
> ---
>   lib/dpdk.c |8 +
>   lib/dpif-netdev-perf.h |1 +
>   lib/dpif-netdev.c  |  329 
> -
>   lib/netdev-dpdk.c  |  270 
> +++---
>   lib/netdev-dpdk.h  |2 +
>   tests/automake.mk  |   10 +-
>   tests/dpdk-packet-mbufs.at |7 +
>   tests/pmd.at   |7 +-
>   tests/system-dpdk-macros.at|6 +-
>   tests/system-dpdk-testsuite.at |1 +
>   tests/system-dpdk.at   |   65 +
>   tests/test-dpdk-mbufs.c|  518 
> +
>   vswitchd/vswitch.xml   |   35 +
>   35 files changed, 4532 insertions(+), 140 deletions(-)
>   create mode 100644 include/sparse/rte_byteorder.h
>   create mode 100644 include/sparse/rte_esp.h
>   create mode 100644 include/sparse/rte_flow.h
>   create mode 100644 include/sparse/rte_icmp.h
>   create mode 100644 

Re: [ovs-dev] [PATCH v4 0/4] Support dynamic rebalancing of offloaded flows

2018-07-24 Thread Sriharsha Basavapatna via dev
Hi Ben,

Just checking if you had a chance to look at v4 of this patch-set,
curious to know if you have any further comments.

Thanks,
-Harsha

On Thu, Jul 12, 2018 at 12:59 PM, Sriharsha Basavapatna
 wrote:
> With the current OVS offload design, when an offload-device fails to add a
> flow rule and returns an error, OVS adds the rule to the kernel datapath.
> The flow gets processed by the kernel datapath for the entire life of that
> flow. This is fine when an error is returned by the device due to lack of
> support for certain keys or actions.
>
> But when an error is returned due to temporary conditions such as lack of
> resources to add a flow rule, the flow continues to be processed by kernel
> even when resources become available later. That is, those flows never get
> offloaded again. This problem becomes more pronounced when a flow that has
> been initially offloaded may have a smaller packet rate than a later flow
> that could not be offloaded due to lack of resources. This leads to
> inefficient use of HW resources and wastage of host CPU cycles.
>
> This patch-set addresses this issue by providing a way to detect temporary
> offload resource constraints (Out-Of-Resource or OOR condition) and to
> selectively and dynamically offload flows with a higher packets-per-second
> (pps) rate. This dynamic rebalancing is done periodically on netdevs that
> are in OOR state until resources become available to offload all pending
> flows.
>
> The patch-set involves the following changes at a high level:
>
> 1. Detection of Out-Of-Resources (OOR) condition on an offload-capable
>netdev.
> 2. Gathering flow offload selection criteria for all flows on an OOR netdev;
>i.e, packets-per-second (pps) rate of flows for offloaded and
>non-offloaded (pending) flows.
> 3. Dynamically replacing offloaded flows with a lower pps-rate, with
>non-offloaded flows with a higher pps-rate, on an OOR netdev.
> 4. A new OpenvSwitch configuration option - "offload-rebalancing"
>to enable this policy.
>
> **
>
> v3-->v4:
>   - Updated parse_flow_put() with the following changes:
> - Fixed outdev memory leak with multiple output actions
> - Moved variables closer to their first use
> - Removed outdev check while setting oor, since indev is sufficient
>
> v2-->v3:
>   - Removed some VLOG_DBG() in patches 2 and 3
>   - Reworded a few VLOG_DBG() in patch 3
>   - Fixed a comment line in patch 3
>
> v1-->v2:
>   - Fixed build errors reported by 0-day robot
>   - Updated patch prefixes with relevant subsystem names
>
> **
>
> Sriharsha Basavapatna (4):
>   dpif-netlink: Detect Out-Of-Resource condition on a netdev
>   revalidator: Gather packets-per-second rate of flows
>   revalidator: Rebalance offloaded flows based on the pps rate
>   netdev: Add a configuration option to enable dynamic rebalancing of
> flows
>
>  lib/dpif-netdev.c |   3 +-
>  lib/dpif-netlink.c|  25 +-
>  lib/dpif-provider.h   |   8 +-
>  lib/dpif.c|  20 +-
>  lib/dpif.h|  20 +-
>  lib/flow.c|  27 ++
>  lib/flow.h|   1 +
>  lib/netdev-provider.h |   8 +
>  lib/netdev.c  |  45 +++
>  lib/netdev.h  |   2 +
>  ofproto/ofproto-dpif-upcall.c | 571 +-
>  vswitchd/vswitch.xml  |  22 ++
>  12 files changed, 732 insertions(+), 20 deletions(-)
>
> --
> 2.18.0.rc1.1.g6f333ff
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 0/4] Add support to offload QinQ

2018-07-24 Thread Simon Horman
On Tue, Jul 17, 2018 at 02:01:53AM +, Jianbo Liu wrote:
> This patchset is to support QinQ offloading, as TC flower supports QinQ.
> 
> v3:
> fix checkpatch warning in patch 4.
> v2:
> fix compile issue in patch 3.
> 
> Jianbo Liu (4):
>   tc: Add VLAN tpid for push action
>   netdev-tc-offloads: Add support to match on 802.1AD ethertype
>   flow: Refactor some of VLAN helper functions
>   Add support to offload QinQ double VLAN headers match

Thanks Jianbo,

this looks fine to me but I think I should wait until the soft-freeze
of the OVS tree to finish before applying this series.

Reviewed-by: Simon Horman 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] YOUR PAYMENT APPROVAL.

2018-07-24 Thread Stuart Symington
YOUR PAYMENT APPROVAL.

Dear Beneficiary,

PLEASE BEAR WITH US FOR MAKING THIS INITIAL CONTACT THROUGH E-MAIL, IT IS DUE 
TO THE URGENCY OF THIS NOTIFICATION,
SINCE WE ARE UNABLE TO REACH YOUR PHONE AND TO OFFICIALLY BRING TO YOUR NOTICE 
ABOUT YOUR COMPENSATION PAYMENT.

THIS IS AMBASSADOR STUART SYMINGTON, USA AMBASSADOR TO NIGERIA. I SHALL BE 
COMING TO YOUR COUNTRY FOR AN OFFICIAL MEETING ON FRIDAY AND I WILL BE BRINGING 
YOUR FUNDS THROUGH AN ATM MASTER CARD OF( $10 Million United State Dollars) 
ALONG WITH ME BUT THIS TIME I WILL NOT GO THROUGH CUSTOMS BECAUSE AS AN 
AMBASSADOR TO NIGERIA, I AM A US GOVERNMENT AGENT AND I HAVE THE VETO POWER TO 
GO THROUGH CUSTOMS. AS SOON AS I AM THROUGH WITH THE MEETING I SHALL THEN 
PROCEED TO YOUR ADDRESS. (SEND YOUR CELL PHONE NUMBER AND THE ADDRESS WHERE YOU 
WANT ME TO BRING THE PACKAGE).
YOU HAVE REALLY PAID SO MUCH IN THIS DELIVERY THAT MAKES ME WONDER. YOU ARE A 
VERY LUCKY PERSON BECAUSE I SHALL BE BRINGING IT MYSELF AND THERE IS NOTHING 
ANYONE CAN DO ABOUT IT.

YOUR PACKAGE( ATM MASTER CARD) MUST BE REGISTERED AS AN AMBASSADORIAL PACKAGE 
FOR ME TO DEFEAT ALL ODDS AND THE COST OF REGISTERING IT IS $150 THE FEE MUST 
BE PAID IN THE NEXT 50 HOURS VIA WESTERN UNION SO THAT ALL NECESSARY 
ARRANGEMENT CAN BE MADE BEFORE TIME WILL BE AGAINST US.

YOU SHOULD SEND THE FEE DIRECTLY TO THE CARGO REGISTRATION OFFICER WITH THE 
INFO BELOW-

Receiver: 
Location:  Lagos,  Nigeria
Amount:  $150 Only(Registration Fee)
No Text Question and Answer:
..Please forward the following 
information(s) to me after payment.
Sender's Name:..
Sender's Address:

AS SOON AS YOU SEND THE FEE MAKE SURE YOU SEND ME THE PAYMENT INFORMATION. MY 
FLIGHT IS TUESDAY AND I EXPECT YOU TO COMPLY BEFORE THEN SO THAT THE DELIVERY 
CAN BE COMPLETED. IF YOU DO NOT COMPLY, THEN IT WILL NOT BE MY FAULT IF YOU DO 
NOT RECEIVE YOUR PACKAGE.

SIGN
Ambassador Stuart Symington
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Improved Packet Drop Statistics in OVS.

2018-07-24 Thread Federico Iezzi
Hi all,

It seems Rohith Basavaraja email has been disabled.
I believe it's too late for someone else to pick up Rohith's work, right?

Federico

On Mon, 11 Jun 2018 at 10:33, Stokes, Ian  wrote:

> > I'm very sorry for double posting.
> > I believe this patch would represent a major feature in term of OVS's
> > usability.
> > Has anyone got any time for reviewing it?
> >
>
> I'm planning to look at it this week.
>
> Thanks
> Ian
>
> > Thanks,
> > Federico
> >
> > On 6 June 2018 at 17:37, Jan Scheurich 
> wrote:
> >
> > > The user-space part for packet drop stats should be generic and work
> > > with any dpif datapath.
> > > So, if someone implemented the equivalent drop stats functionality in
> > > the kernel datapath that would be very welcome.
> > > We in Ericsson cannot do that currently due to license restrictions.
> > >
> > > Regards, Jan
> > >
> > > > -Original Message-
> > > > From: ovs-dev-boun...@openvswitch.org
> > > > 
> > > On Behalf Of Rohith Basavaraja
> > > > Sent: Friday, 25 May, 2018 07:37
> > > > To: Ben Pfaff 
> > > > Cc: d...@openvswitch.org
> > > > Subject: Re: [ovs-dev] [PATCH] Improved Packet Drop Statistics in
> OVS.
> > > >
> > > > Thanks Ben for the clarification. Yes this new stuff is used only in
> > > > the DPDK datapath and it’s not used in the kernel datapath.
> > > >
> > > > Thanks
> > > > Rohith
> > > >
> > > > On 25/05/18, 2:52 AM, "Ben Pfaff"  wrote:
> > > >
> > > > On Thu, May 24, 2018 at 02:19:06AM +, Rohith Basavaraja
> wrote:
> > > > > Only  changes in
> > > > > datapath/linux/compat/include/linux/openvswitch.h
> > > > > are related to OvS Kernel module.
> > > >
> > > > On a second look, I see that the new stuff here is only for the
> > DPDK
> > > > datapath.  If you don't intend to add this feature to the kernel
> > > > datapath, there should be no problem.  Never mind.
> > > >
> > > >
> > > > ___
> > > > dev mailing list
> > > > d...@openvswitch.org
> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > ___
> > > dev mailing list
> > > d...@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v5 08/14] dp-packet: Handle multi-seg mbufs in resize__().

2018-07-24 Thread Lam, Tiago
On 23/07/2018 23:48, Darrell Ball wrote:
> 
> 
> On Fri, Jul 20, 2018 at 10:10 AM, Lam, Tiago  > wrote:
> 
> On 19/07/2018 00:02, Darrell Ball wrote:
> > 
> > 
> > On Tue, Jul 17, 2018 at 12:59 AM, Lam, Tiago  
> > >> wrote:
> > 
> >     On 16/07/2018 09:37, Lam, Tiago wrote:
> >     > On 13/07/2018 18:54, Darrell Ball wrote:
> >     >> Thanks for the patch.
> >     >>
> >     >> A few queries inline.
> >     >>
> >     >
> >     > Hi Darrell,
> >     >
> >     > Thanks for your inputs. I've replied in-line as well.
> >     >
> >     >> On Wed, Jul 11, 2018 at 11:23 AM, Tiago Lam  
> >     >
> >     >> 
>  >     >>
> >     >>     When enabled with DPDK OvS relies on mbufs allocated by
> >     mempools to
> >     >>     receive and output data on DPDK ports. Until now, each OvS
> >     dp_packet has
> >     >>     had only one mbuf associated, which is allocated with
> the maximum
> >     >>     possible size, taking the MTU into account. This approach,
> >     however,
> >     >>     doesn't allow us to increase the allocated size in an mbuf,
> >     if needed,
> >     >>     since an mbuf is allocated and initialised upon mempool
> >     creation. Thus,
> >     >>     in the current implementatin this is dealt with by calling
> >     >>     OVS_NOT_REACHED() and terminating OvS.
> >     >>
> >     >>     To avoid this, and allow the (already) allocated space
> to be
> >     better
> >     >>     used, dp_packet_resize__() now tries to use the available
> >     room, both the
> >     >>     tailroom and the headroom, to make enough space for the new
> >     data. Since
> >     >>     this happens for packets of source DPBUF_DPDK, the
> >     single-segment mbuf
> >     >>     case mentioned above is also covered by this new aproach in
> >     resize__().
> >     >>
> >     >>     Signed-off-by: Tiago Lam  
> >     >
> >     >>         >     >>     Acked-by: Eelco Chaudron  
> >     >
> >     >>         >     >>     ---
> >     >>      lib/dp-packet.c | 48
> >     ++--
> >     >>      1 file changed, 46 insertions(+), 2 deletions(-)
> >     >>
> >     >>     diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> >     >>     index d6e19eb..87af459 100644
> >     >>     --- a/lib/dp-packet.c
> >     >>     +++ b/lib/dp-packet.c
> >     >>     @@ -237,9 +237,51 @@ dp_packet_resize__(struct
> dp_packet *b,
> >     size_t
> >     >>     new_headroom, size_t new_tailroom
> >     >>          new_allocated = new_headroom + dp_packet_size(b) +
> >     new_tailroom;
> >     >>
> >     >>          switch (b->source) {
> >     >>     +    /* When resizing mbufs, both a single mbuf and
> multi-segment
> >     >>     mbufs (where
> >     >>     +     * data is not contigously held in memory), both the
> >     headroom
> >     >>     and the
> >     >>     +     * tailroom available will be used to make more space
> >     for where
> >     >>     data needs
> >     >>     +     * to be inserted. I.e if there's not enough headroom,
> >     data may
> >     >>     be shifted
> >     >>     +     * right if there's enough tailroom.
> >     >>     +     * However, this is not bulletproof and in some cases
> >     the space
> >     >>     available
> >     >>     +     * won't be enough - in those cases, an error
> should be
> >     >>     returned and the
> >     >>     +     * packet dropped. */
> >     >>          case DPBUF_DPDK:
> >     >>     -        OVS_NOT_REACHED();
> >     >>
> >     >>
> >     >> Previously, it was a coding error to call this function for a
> >     DPDK mbuf
> >     >> case, which is pretty
> >     >> clear. But with this patch, presumably that is not longer the
> >     case and
> >     >> the calling the API is
> >     >> now ok for DPDK mbufs.
> >     >>
> >     >
> >     > As it stands, it will still be an 

Re: [ovs-dev] [PATCH v5 08/14] dp-packet: Handle multi-seg mbufs in resize__().

2018-07-24 Thread Lam, Tiago
On 23/07/2018 23:55, Darrell Ball wrote:
> 
> 
> On Fri, Jul 20, 2018 at 10:09 AM, Lam, Tiago  > wrote:
> 
> On 18/07/2018 23:53, Darrell Ball wrote:
> > sorry, several distractions delayed response.
> > 
> > On Mon, Jul 16, 2018 at 1:37 AM, Lam, Tiago  
> > >> wrote:
> > 
> >     On 13/07/2018 18:54, Darrell Ball wrote:
> >     > Thanks for the patch.
> >     > 
> >     > A few queries inline.
> >     > 
> > 
> >     Hi Darrell,
> > 
> >     Thanks for your inputs. I've replied in-line as well.
> > 
> >     > On Wed, Jul 11, 2018 at 11:23 AM, Tiago Lam  
> >
> >     > 
>  >     > 
> >     >     When enabled with DPDK OvS relies on mbufs allocated by 
> mempools to
> >     >     receive and output data on DPDK ports. Until now, each OvS 
> dp_packet has
> >     >     had only one mbuf associated, which is allocated with the 
> maximum
> >     >     possible size, taking the MTU into account. This approach, 
> however,
> >     >     doesn't allow us to increase the allocated size in an mbuf, 
> if needed,
> >     >     since an mbuf is allocated and initialised upon mempool 
> creation. Thus,
> >     >     in the current implementatin this is dealt with by calling
> >     >     OVS_NOT_REACHED() and terminating OvS.
> >     > 
> >     >     To avoid this, and allow the (already) allocated space to be 
> better
> >     >     used, dp_packet_resize__() now tries to use the available 
> room, both the
> >     >     tailroom and the headroom, to make enough space for the new 
> data. Since
> >     >     this happens for packets of source DPBUF_DPDK, the 
> single-segment mbuf
> >     >     case mentioned above is also covered by this new aproach in 
> resize__().
> >     > 
> >     >     Signed-off-by: Tiago Lam  
> >
> >     >     
>  >     >     Acked-by: Eelco Chaudron  
> >     >
> >     >     
>  >     >     ---
> >     >      lib/dp-packet.c | 48
> >     ++--
> >     >      1 file changed, 46 insertions(+), 2 deletions(-)
> >     >
> >     >     diff --git a/lib/dp-packet.c b/lib/dp-packet.c
> >     >     index d6e19eb..87af459 100644
> >     >     --- a/lib/dp-packet.c
> >     >     +++ b/lib/dp-packet.c
> >     >     @@ -237,9 +237,51 @@ dp_packet_resize__(struct dp_packet *b,
> >     size_t
> >     >     new_headroom, size_t new_tailroom
> >     >          new_allocated = new_headroom + dp_packet_size(b) +
> >     new_tailroom;
> >     >
> >     >          switch (b->source) {
> >     >     +    /* When resizing mbufs, both a single mbuf and
> multi-segment
> >     >     mbufs (where
> >     >     +     * data is not contigously held in memory), both
> the headroom
> >     >     and the
> >     >     +     * tailroom available will be used to make more
> space for
> >     where
> >     >     data needs
> >     >     +     * to be inserted. I.e if there's not enough headroom,
> >     data may
> >     >     be shifted
> >     >     +     * right if there's enough tailroom.
> >     >     +     * However, this is not bulletproof and in some
> cases the
> >     space
> >     >     available
> >     >     +     * won't be enough - in those cases, an error should be
> >     >     returned and the
> >     >     +     * packet dropped. */
> >     >          case DPBUF_DPDK:
> >     >     -        OVS_NOT_REACHED();
> >     >
> >     >
> >     > Previously, it was a coding error to call this function for
> a DPDK
> >     mbuf
> >     > case, which is pretty
> >     > clear. But with this patch, presumably that is not longer
> the case and
> >     > the calling the API is
> >     > now ok for DPDK mbufs.
> >     >
> >
> >     As it stands, it will still be an error to call
> dp_packet_resize__() for
> >     any DPDK packet, or by extension any of the other functions
> that call
> >     it, such as dp_packet_prealloc_tailroom() and
> >     

  1   2   >