[ovs-dev] [PATCH V7 04/18] netdev-offload-dpdk: Improve HW offload flow debuggability

2020-01-08 Thread Eli Britstein
Add debug prints when creating and destroying rte flows, with all the
flow details (attributes, patterns, actions).

Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 lib/netdev-offload-dpdk.c | 209 ++
 1 file changed, 138 insertions(+), 71 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index a5bc4ad33..1a533ec26 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -28,6 +28,7 @@
 #include "uuid.h"
 
 VLOG_DEFINE_THIS_MODULE(netdev_offload_dpdk);
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(100, 5);
 
 /* Thread-safety
  * =
@@ -132,72 +133,70 @@ struct flow_actions {
 };
 
 static void
-dump_flow_pattern(struct rte_flow_item *item)
+dump_flow_attr(struct ds *s, const struct rte_flow_attr *attr)
 {
-struct ds s;
-
-if (!VLOG_IS_DBG_ENABLED() || item->type == RTE_FLOW_ITEM_TYPE_END) {
-return;
-}
-
-ds_init();
+ds_put_format(s,
+  "  Attributes: "
+  "ingress=%d, egress=%d, prio=%d, group=%d, transfer=%d\n",
+  attr->ingress, attr->egress, attr->priority, attr->group,
+  attr->transfer);
+}
 
+static void
+dump_flow_pattern(struct ds *s, const struct rte_flow_item *item)
+{
 if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
 const struct rte_flow_item_eth *eth_spec = item->spec;
 const struct rte_flow_item_eth *eth_mask = item->mask;
 
-ds_put_cstr(, "rte flow eth pattern:\n");
+ds_put_cstr(s, "rte flow eth pattern:\n");
 if (eth_spec) {
-ds_put_format(,
+ds_put_format(s,
   "  Spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", "
   "type=0x%04" PRIx16"\n",
   ETH_ADDR_BYTES_ARGS(eth_spec->src.addr_bytes),
   ETH_ADDR_BYTES_ARGS(eth_spec->dst.addr_bytes),
   ntohs(eth_spec->type));
 } else {
-ds_put_cstr(, "  Spec = null\n");
+ds_put_cstr(s, "  Spec = null\n");
 }
 if (eth_mask) {
-ds_put_format(,
+ds_put_format(s,
   "  Mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", "
   "type=0x%04"PRIx16"\n",
   ETH_ADDR_BYTES_ARGS(eth_mask->src.addr_bytes),
   ETH_ADDR_BYTES_ARGS(eth_mask->dst.addr_bytes),
   ntohs(eth_mask->type));
 } else {
-ds_put_cstr(, "  Mask = null\n");
+ds_put_cstr(s, "  Mask = null\n");
 }
-}
-
-if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+} else if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
 const struct rte_flow_item_vlan *vlan_spec = item->spec;
 const struct rte_flow_item_vlan *vlan_mask = item->mask;
 
-ds_put_cstr(, "rte flow vlan pattern:\n");
+ds_put_cstr(s, "rte flow vlan pattern:\n");
 if (vlan_spec) {
-ds_put_format(,
+ds_put_format(s,
   "  Spec: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
   ntohs(vlan_spec->inner_type), ntohs(vlan_spec->tci));
 } else {
-ds_put_cstr(, "  Spec = null\n");
+ds_put_cstr(s, "  Spec = null\n");
 }
 
 if (vlan_mask) {
-ds_put_format(,
+ds_put_format(s,
   "  Mask: inner_type=0x%"PRIx16", tci=0x%"PRIx16"\n",
   ntohs(vlan_mask->inner_type), ntohs(vlan_mask->tci));
 } else {
-ds_put_cstr(, "  Mask = null\n");
+ds_put_cstr(s, "  Mask = null\n");
 }
-}
-
-if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
+} else if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
 const struct rte_flow_item_ipv4 *ipv4_spec = item->spec;
 const struct rte_flow_item_ipv4 *ipv4_mask = item->mask;
 
-ds_put_cstr(, "rte flow ipv4 pattern:\n");
+ds_put_cstr(s, "rte flow ipv4 pattern:\n");
 if (ipv4_spec) {
-ds_put_format(,
+ds_put_format(s,
   "  Spec: tos=0x%"PRIx8", ttl=%"PRIx8
   ", proto=0x%"PRIx8
   ", src="IP_FMT", dst="IP_FMT"\n",
@@ -207,10 +206,10 @@ dump_flow_pattern(struct rte_flow_item *item)
   IP_ARGS(ipv4_spec->hdr.src_addr),
   IP_ARGS(ipv4_spec->hdr.dst_addr));
 } else {
-ds_put_cstr(, "  Spec = null\n");
+ds_put_cstr(s, "  Spec = null\n");
 }
 if (ipv4_mask) {
-ds_put_format(,
+ds_put_format(s,
   "  Mask: tos=0x%"PRIx8", ttl=%"PRIx8
   ", proto=0x%"PRIx8
   ", src="IP_FMT", dst="IP_FMT"\n",
@@ -220,89 +219,81 @@ dump_flow_pattern(struct 

[ovs-dev] [PATCH V7 11/18] dpif-netdev: Populate dpif class field in offload struct

2020-01-08 Thread Eli Britstein
Populate dpif class field in offload struct to be used in offloading
flow put.

Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 lib/dpif-netdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 7ec217f39..80b3fdef3 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2398,6 +2398,7 @@ static int
 dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
 {
 struct dp_netdev_pmd_thread *pmd = offload->pmd;
+const struct dpif_class *dpif_class = pmd->dp->class;
 struct dp_netdev_flow *flow = offload->flow;
 odp_port_t in_port = flow->flow.in_port.odp_port;
 bool modification = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_MOD;
@@ -2435,6 +2436,7 @@ dp_netdev_flow_offload_put(struct dp_flow_offload_item 
*offload)
 }
 }
 info.flow_mark = mark;
+info.dpif_class = dpif_class;
 
 port = netdev_ports_get(in_port, pmd->dp->class);
 if (!port || netdev_vport_is_vport_class(port->netdev_class)) {
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 09/18] dpctl: Support dump-flows filters "dpdk" and "partially-offloaded"

2020-01-08 Thread Eli Britstein
Flows that are offloaded via DPDK can be partially offloaded (matches
only) or fully offloaded (matches and actions). Set partially offloaded
display to (offloaded=partial, dp:ovs), and fully offloaded to
(offloaded=yes, dp:dpdk). Also support filter types "dpdk" and
"partially-offloaded".

Signed-off-by: Eli Britstein 
---
 NEWS  |  3 +++
 lib/dpctl.c   | 28 +---
 lib/dpctl.man |  2 ++
 3 files changed, 30 insertions(+), 3 deletions(-)

diff --git a/NEWS b/NEWS
index 965facaf8..8e89f77f6 100644
--- a/NEWS
+++ b/NEWS
@@ -34,6 +34,9 @@ Post-v2.12.0
interval is increased to 60 seconds for the connection to the
replication server. This value is configurable with the unixctl
command - ovsdb-server/set-active-ovsdb-server-probe-interval.
+   - 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
+ partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
+ type filter supports new filters: "dpdk" and "partially-offloaded".
 
 v2.12.0 - 03 Sep 2019
 -
diff --git a/lib/dpctl.c b/lib/dpctl.c
index a1ea25b47..23c2682d0 100644
--- a/lib/dpctl.c
+++ b/lib/dpctl.c
@@ -818,7 +818,11 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow *f, 
struct hmap *ports,
 
 dpif_flow_stats_format(>stats, ds);
 if (dpctl_p->verbosity && f->attrs.offloaded) {
-ds_put_cstr(ds, ", offloaded:yes");
+if (f->attrs.dp_layer && !strcmp(f->attrs.dp_layer, "ovs")) {
+ds_put_cstr(ds, ", offloaded:partial");
+} else {
+ds_put_cstr(ds, ", offloaded:yes");
+}
 }
 if (dpctl_p->verbosity && f->attrs.dp_layer) {
 ds_put_format(ds, ", dp:%s", f->attrs.dp_layer);
@@ -830,8 +834,10 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow *f, 
struct hmap *ports,
 struct dump_types {
 bool ovs;
 bool tc;
+bool dpdk;
 bool offloaded;
 bool non_offloaded;
+bool partially_offloaded;
 };
 
 static void
@@ -839,8 +845,10 @@ enable_all_dump_types(struct dump_types *dump_types)
 {
 dump_types->ovs = true;
 dump_types->tc = true;
+dump_types->dpdk = true;
 dump_types->offloaded = true;
 dump_types->non_offloaded = true;
+dump_types->partially_offloaded = true;
 }
 
 static int
@@ -865,10 +873,14 @@ populate_dump_types(char *types_list, struct dump_types 
*dump_types,
 dump_types->ovs = true;
 } else if (!strcmp(current_type, "tc")) {
 dump_types->tc = true;
+} else if (!strcmp(current_type, "dpdk")) {
+dump_types->dpdk = true;
 } else if (!strcmp(current_type, "offloaded")) {
 dump_types->offloaded = true;
 } else if (!strcmp(current_type, "non-offloaded")) {
 dump_types->non_offloaded = true;
+} else if (!strcmp(current_type, "partially-offloaded")) {
+dump_types->partially_offloaded = true;
 } else if (!strcmp(current_type, "all")) {
 enable_all_dump_types(dump_types);
 } else {
@@ -886,7 +898,9 @@ determine_dpif_flow_dump_types(struct dump_types 
*dump_types,
 {
 dpif_dump_types->ovs_flows = dump_types->ovs || dump_types->non_offloaded;
 dpif_dump_types->netdev_flows = dump_types->tc || dump_types->offloaded
-|| dump_types->non_offloaded;
+|| dump_types->non_offloaded
+|| dump_types->dpdk
+|| dump_types->partially_offloaded;
 }
 
 static bool
@@ -899,7 +913,15 @@ flow_passes_type_filter(const struct dpif_flow *f,
 if (dump_types->tc && !strcmp(f->attrs.dp_layer, "tc")) {
 return true;
 }
-if (dump_types->offloaded && f->attrs.offloaded) {
+if (dump_types->dpdk && !strcmp(f->attrs.dp_layer, "dpdk")) {
+return true;
+}
+if (dump_types->offloaded && f->attrs.offloaded &&
+strcmp(f->attrs.dp_layer, "ovs")) {
+return true;
+}
+if (dump_types->partially_offloaded && f->attrs.offloaded &&
+!strcmp(f->attrs.dp_layer, "ovs")) {
 return true;
 }
 if (dump_types->non_offloaded && !(f->attrs.offloaded)) {
diff --git a/lib/dpctl.man b/lib/dpctl.man
index 090c3b960..727d1f7be 100644
--- a/lib/dpctl.man
+++ b/lib/dpctl.man
@@ -124,8 +124,10 @@ This option supported only for \fBovs\-appctl 
dpctl/dump\-flows\fR.
 .
\fBovs\fR - displays flows handled in the ovs dp
\fBtc\fR - displays flows handled in the tc dp
+   \fBdpdk\fR - displays flows fully offloaded by dpdk
\fBoffloaded\fR - displays flows offloaded to the HW
\fBnon-offloaded\fR - displays flows not offloaded to the HW
+   \fBpartially-offloaded\fR - displays flows where only part of their 
proccessing is done in HW
\fBall\fR - displays all the types of flows
 .IP
 By default all the types of flows are displayed.
-- 
2.14.5

___
dev mailing 

[ovs-dev] [PATCH V7 16/18] netdev-offload-dpdk: Support offload of set MAC actions

2020-01-08 Thread Eli Britstein
Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 Documentation/howto/dpdk.rst |  1 +
 NEWS |  3 +-
 lib/netdev-offload-dpdk.c| 99 
 3 files changed, 102 insertions(+), 1 deletion(-)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index d9de7bedd..0e9939ddd 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -392,6 +392,7 @@ Supported actions for hardware offload are:
 
 - Output.
 - Drop.
+- Modification of Ethernet (mod_dl_src/mod_dl_dst).
 
 Further Reading
 ---
diff --git a/NEWS b/NEWS
index ffa6a91be..a34990388 100644
--- a/NEWS
+++ b/NEWS
@@ -34,7 +34,8 @@ Post-v2.12.0
interval is increased to 60 seconds for the connection to the
replication server. This value is configurable with the unixctl
command - ovsdb-server/set-active-ovsdb-server-probe-interval.
- * Add hardware offload support for output and drop actions (experimental).
+ * Add hardware offload support for output, drop and set MAC actions
+   (experimental).
- 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
  partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
  type filter supports new filters: "dpdk" and "partially-offloaded".
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 026e77756..afa8f11e4 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -369,6 +369,21 @@ dump_flow_action(struct ds *s, const struct 
rte_flow_action *actions)
 }
 } else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
 ds_put_cstr(s, "rte flow drop action\n");
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_SRC ||
+   actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST) {
+const struct rte_flow_action_set_mac *set_mac = actions->conf;
+
+char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_MAC_DST
+   ? "dst" : "src";
+
+ds_put_format(s, "rte flow set-mac-%s action:\n", dirstr);
+if (set_mac) {
+ds_put_format(s,
+  "  Set-mac-%s: "ETH_ADDR_FMT"\n", dirstr,
+  ETH_ADDR_BYTES_ARGS(set_mac->mac_addr));
+} else {
+ds_put_format(s, "  Set-mac-%s = null\n", dirstr);
+}
 } else {
 ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
 }
@@ -797,6 +812,80 @@ add_output_action(struct netdev *netdev,
 return ret;
 }
 
+static int
+add_set_flow_action__(struct flow_actions *actions,
+  const void *value, void *mask,
+  const size_t size, const int attr)
+{
+void *spec;
+
+if (mask) {
+/* DPDK does not support partially masked set actions. In such
+ * case, fail the offload.
+ */
+if (is_all_zeros(mask, size)) {
+return 0;
+}
+if (!is_all_ones(mask, size)) {
+VLOG_DBG_RL(, "Partial mask is not supported");
+return -1;
+}
+}
+
+spec = xzalloc(size);
+memcpy(spec, value, size);
+add_flow_action(actions, attr, spec);
+
+/* Clear used mask for later checking. */
+if (mask) {
+memset(mask, 0, size);
+}
+return 0;
+}
+
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
+  MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
+  MEMBER_SIZEOF(struct ovs_key_ethernet, eth_dst));
+
+static int
+parse_set_actions(struct flow_actions *actions,
+  const struct nlattr *set_actions,
+  const size_t set_actions_len,
+  bool masked)
+{
+const struct nlattr *sa;
+unsigned int sleft;
+
+#define add_set_flow_action(field, type)  \
+if (add_set_flow_action__(actions, >field,   \
+  mask ? CONST_CAST(void *, >field) : NULL, \
+  sizeof key->field, type)) { \
+return -1;\
+}
+
+NL_ATTR_FOR_EACH_UNSAFE (sa, sleft, set_actions, set_actions_len) {
+if (nl_attr_type(sa) == OVS_KEY_ATTR_ETHERNET) {
+const struct ovs_key_ethernet *key = nl_attr_get(sa);
+const struct ovs_key_ethernet *mask = masked ? key + 1 : NULL;
+
+add_set_flow_action(eth_src, RTE_FLOW_ACTION_TYPE_SET_MAC_SRC);
+add_set_flow_action(eth_dst, RTE_FLOW_ACTION_TYPE_SET_MAC_DST);
+
+if (mask && !is_all_zeros(mask, sizeof *mask)) {
+VLOG_DBG_RL(, "Unsupported ETHERNET set action");
+return -1;
+}
+} else {
+VLOG_DBG_RL(,
+"Unsupported set action type %d", 

[ovs-dev] [PATCH V7 12/18] netdev-dpdk: Getter function for dpdk port id API

2020-01-08 Thread Eli Britstein
Add a getter function for using the dpdk port id outside the scope of
netdev-dpdk.c to be used for HW offload.

Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 lib/netdev-dpdk.c | 18 ++
 lib/netdev-dpdk.h |  2 ++
 2 files changed, 20 insertions(+)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 4ff5a70a8..db25c292e 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -4490,6 +4490,24 @@ unlock:
 return err;
 }
 
+int
+netdev_dpdk_get_port_id(struct netdev *netdev)
+{
+struct netdev_dpdk *dev;
+int ret = -1;
+
+if (!is_dpdk_class(netdev->netdev_class)) {
+goto out;
+}
+
+dev = netdev_dpdk_cast(netdev);
+ovs_mutex_lock(>mutex);
+ret = dev->port_id;
+ovs_mutex_unlock(>mutex);
+out:
+return ret;
+}
+
 bool
 netdev_dpdk_flow_api_supported(struct netdev *netdev)
 {
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index 59919a89a..848346cb4 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -53,6 +53,8 @@ netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
  struct rte_flow *rte_flow,
  struct rte_flow_query_count *query,
  struct rte_flow_error *error);
+int
+netdev_dpdk_get_port_id(struct netdev *netdev);
 
 #else
 
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 13/18] netdev-offload: Introduce a function to validate same flow api handle

2020-01-08 Thread Eli Britstein
Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-provider.h |  1 +
 lib/netdev-offload.c  | 12 
 2 files changed, 13 insertions(+)

diff --git a/lib/netdev-offload-provider.h b/lib/netdev-offload-provider.h
index 4e1c4251d..5a809c0cd 100644
--- a/lib/netdev-offload-provider.h
+++ b/lib/netdev-offload-provider.h
@@ -89,6 +89,7 @@ struct netdev_flow_api {
 
 int netdev_register_flow_api_provider(const struct netdev_flow_api *);
 int netdev_unregister_flow_api_provider(const char *type);
+bool netdev_flow_api_equals(const struct netdev *, const struct netdev *);
 
 #ifdef __linux__
 extern const struct netdev_flow_api netdev_offload_tc;
diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index ae01acda6..32eab5910 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -156,6 +156,18 @@ netdev_unregister_flow_api_provider(const char *type)
 return error;
 }
 
+bool
+netdev_flow_api_equals(const struct netdev *netdev1,
+   const struct netdev *netdev2)
+{
+const struct netdev_flow_api *netdev_flow_api1 =
+ovsrcu_get(const struct netdev_flow_api *, >flow_api);
+const struct netdev_flow_api *netdev_flow_api2 =
+ovsrcu_get(const struct netdev_flow_api *, >flow_api);
+
+return netdev_flow_api1 == netdev_flow_api2;
+}
+
 static int
 netdev_assign_flow_api(struct netdev *netdev)
 {
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 07/18] netdev-offload-dpdk: Framework for actions offload

2020-01-08 Thread Eli Britstein
Currently HW offload is accelerating only the rule matching sequence.
Introduce a framework for offloading rule actions as a pre-step for
processing the rule actions in HW. In case of a failure, fallback to the
legacy partial offload scheme.

Note: a flow will be fully offloaded only if it can process all its
actions in HW.

Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 lib/netdev-offload-dpdk.c | 104 +++---
 1 file changed, 88 insertions(+), 16 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 5345c434c..4fa4df0d9 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -56,6 +56,7 @@ struct ufid_to_rte_flow_data {
 struct cmap_node node;
 ovs_u128 ufid;
 struct rte_flow *rte_flow;
+bool actions_offloaded;
 };
 
 /* Find rte_flow with @ufid. */
@@ -76,7 +77,7 @@ ufid_to_rte_flow_data_find(const ovs_u128 *ufid)
 
 static inline void
 ufid_to_rte_flow_associate(const ovs_u128 *ufid,
-   struct rte_flow *rte_flow)
+   struct rte_flow *rte_flow, bool actions_offloaded)
 {
 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
 struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data);
@@ -95,6 +96,7 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid,
 
 data->ufid = *ufid;
 data->rte_flow = rte_flow;
+data->actions_offloaded = actions_offloaded;
 
 cmap_insert(_to_rte_flow,
 CONST_CAST(struct cmap_node *, >node), hash);
@@ -697,24 +699,89 @@ add_flow_mark_rss_actions(struct flow_actions *actions,
 add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
 }
 
-static int
-netdev_offload_dpdk_add_flow(struct netdev *netdev,
- const struct match *match,
- struct nlattr *nl_actions OVS_UNUSED,
- size_t actions_len OVS_UNUSED,
- const ovs_u128 *ufid,
- struct offload_info *info)
+static struct rte_flow *
+netdev_offload_dpdk_mark_rss(struct flow_patterns *patterns,
+ struct netdev *netdev,
+ uint32_t flow_mark)
 {
+struct flow_actions actions = { .actions = NULL, .cnt = 0 };
 const struct rte_flow_attr flow_attr = {
 .group = 0,
 .priority = 0,
 .ingress = 1,
 .egress = 0
 };
-struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
-struct flow_actions actions = { .actions = NULL, .cnt = 0 };
+struct rte_flow_error error;
 struct rte_flow *flow;
+
+add_flow_mark_rss_actions(, flow_mark, netdev);
+
+flow = netdev_offload_dpdk_flow_create(netdev, _attr, patterns->items,
+   actions.actions, );
+
+free_flow_actions();
+return flow;
+}
+
+static int
+parse_flow_actions(struct netdev *netdev OVS_UNUSED,
+   struct flow_actions *actions,
+   struct nlattr *nl_actions,
+   size_t nl_actions_len,
+   struct offload_info *info OVS_UNUSED)
+{
+struct nlattr *nla;
+size_t left;
+
+NL_ATTR_FOR_EACH_UNSAFE (nla, left, nl_actions, nl_actions_len) {
+VLOG_DBG_RL(, "Unsupported action type %d", nl_attr_type(nla));
+return -1;
+}
+
+if (nl_actions_len == 0) {
+VLOG_DBG_RL(, "No actions provided");
+return -1;
+}
+
+add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
+return 0;
+}
+
+static struct rte_flow *
+netdev_offload_dpdk_actions(struct netdev *netdev,
+struct flow_patterns *patterns,
+struct nlattr *nl_actions,
+size_t actions_len,
+struct offload_info *info)
+{
+const struct rte_flow_attr flow_attr = { .ingress = 1, .transfer = 1 };
+struct flow_actions actions = { .actions = NULL, .cnt = 0 };
+struct rte_flow *flow = NULL;
 struct rte_flow_error error;
+int ret;
+
+ret = parse_flow_actions(netdev, , nl_actions, actions_len, info);
+if (ret) {
+goto out;
+}
+flow = netdev_offload_dpdk_flow_create(netdev, _attr, patterns->items,
+   actions.actions, );
+out:
+free_flow_actions();
+return flow;
+}
+
+static int
+netdev_offload_dpdk_add_flow(struct netdev *netdev,
+ const struct match *match,
+ struct nlattr *nl_actions,
+ size_t actions_len,
+ const ovs_u128 *ufid,
+ struct offload_info *info)
+{
+struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
+bool actions_offloaded = true;
+struct rte_flow *flow;
 int ret = 0;
 
 ret = parse_flow_match(, match);
@@ -722,22 +789,27 @@ 

[ovs-dev] [PATCH V7 08/18] netdev-offload-dpdk: Implement flow get method

2020-01-08 Thread Eli Britstein
Implement the flow get method for DPDK, to get the statistics of the
provided ufid, towards reading statistics of fully offloaded flows.

Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 lib/netdev-offload-dpdk.c | 47 +++
 1 file changed, 47 insertions(+)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 4fa4df0d9..55e266072 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -57,6 +57,7 @@ struct ufid_to_rte_flow_data {
 ovs_u128 ufid;
 struct rte_flow *rte_flow;
 bool actions_offloaded;
+struct dpif_flow_stats stats;
 };
 
 /* Find rte_flow with @ufid. */
@@ -964,9 +965,55 @@ netdev_offload_dpdk_init_flow_api(struct netdev *netdev)
 return netdev_dpdk_flow_api_supported(netdev) ? 0 : EOPNOTSUPP;
 }
 
+static int
+netdev_offload_dpdk_flow_get(struct netdev *netdev,
+ struct match *match OVS_UNUSED,
+ struct nlattr **actions OVS_UNUSED,
+ const ovs_u128 *ufid,
+ struct dpif_flow_stats *stats,
+ struct dpif_flow_attrs *attrs,
+ struct ofpbuf *buf OVS_UNUSED)
+{
+struct rte_flow_query_count query = { .reset = 1 };
+struct ufid_to_rte_flow_data *rte_flow_data;
+struct rte_flow_error error;
+int ret = 0;
+
+rte_flow_data = ufid_to_rte_flow_data_find(ufid);
+if (!rte_flow_data || !rte_flow_data->rte_flow) {
+ret = -1;
+goto out;
+}
+
+attrs->offloaded = true;
+if (!rte_flow_data->actions_offloaded) {
+attrs->dp_layer = "ovs";
+memset(stats, 0, sizeof *stats);
+goto out;
+}
+attrs->dp_layer = "dpdk";
+ret = netdev_dpdk_rte_flow_query_count(netdev, rte_flow_data->rte_flow,
+   , );
+if (ret) {
+VLOG_DBG_RL(, "%s: Failed to query ufid "UUID_FMT" flow: %p\n",
+netdev_get_name(netdev), UUID_ARGS((struct uuid *)ufid),
+rte_flow_data->rte_flow);
+goto out;
+}
+rte_flow_data->stats.n_packets += (query.hits_set) ? query.hits : 0;
+rte_flow_data->stats.n_bytes += (query.bytes_set) ? query.bytes : 0;
+if (query.hits_set && query.hits) {
+rte_flow_data->stats.used = time_msec();
+}
+memcpy(stats, _flow_data->stats, sizeof *stats);
+out:
+return ret;
+}
+
 const struct netdev_flow_api netdev_offload_dpdk = {
 .type = "dpdk_flow_api",
 .flow_put = netdev_offload_dpdk_flow_put,
 .flow_del = netdev_offload_dpdk_flow_del,
 .init_flow_api = netdev_offload_dpdk_init_flow_api,
+.flow_get = netdev_offload_dpdk_flow_get,
 };
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 18/18] netdev-offload-dpdk: Support offload of set TCP/UDP ports actions

2020-01-08 Thread Eli Britstein
Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 Documentation/howto/dpdk.rst |  1 +
 NEWS |  4 ++--
 lib/netdev-offload-dpdk.c| 43 +++
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index c440bf28e..be950d7ce 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -394,6 +394,7 @@ Supported actions for hardware offload are:
 - Drop.
 - Modification of Ethernet (mod_dl_src/mod_dl_dst).
 - Modification of IPv4 (mod_nw_src/mod_nw_dst/mod_nw_ttl).
+- Modification of TCP/UDP (mod_tp_src/mod_tp_dst).
 
 Further Reading
 ---
diff --git a/NEWS b/NEWS
index 00aa10cae..e8d662a0c 100644
--- a/NEWS
+++ b/NEWS
@@ -34,8 +34,8 @@ Post-v2.12.0
interval is increased to 60 seconds for the connection to the
replication server. This value is configurable with the unixctl
command - ovsdb-server/set-active-ovsdb-server-probe-interval.
- * Add hardware offload support for output, drop and set actions of
-   MAC and IPv4 (experimental).
+ * Add hardware offload support for output, drop, set of MAC, IPv4 and
+   TCP/UDP ports actions (experimental).
- 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
  partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
  type filter supports new filters: "dpdk" and "partially-offloaded".
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 78aaa0cb2..aa2ae4818 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -407,6 +407,19 @@ dump_flow_action(struct ds *s, const struct 
rte_flow_action *actions)
 } else {
 ds_put_cstr(s, "  Set-ttl = null\n");
 }
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_SRC ||
+   actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST) {
+const struct rte_flow_action_set_tp *set_tp = actions->conf;
+char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_TP_DST
+   ? "dst" : "src";
+
+ds_put_format(s, "rte flow set-tcp/udp-port-%s action:\n", dirstr);
+if (set_tp) {
+ds_put_format(s, "  Set-%s-tcp/udp-port: %"PRIu16"\n", dirstr,
+  ntohs(set_tp->port));
+} else {
+ds_put_format(s, "  Set-%s-tcp/udp-port = null\n", dirstr);
+}
 } else {
 ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
 }
@@ -876,6 +889,14 @@ BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) 
==
   MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst));
 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
   MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
+  MEMBER_SIZEOF(struct ovs_key_tcp, tcp_src));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
+  MEMBER_SIZEOF(struct ovs_key_tcp, tcp_dst));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
+  MEMBER_SIZEOF(struct ovs_key_udp, udp_src));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_tp) ==
+  MEMBER_SIZEOF(struct ovs_key_udp, udp_dst));
 
 static int
 parse_set_actions(struct flow_actions *actions,
@@ -917,6 +938,28 @@ parse_set_actions(struct flow_actions *actions,
 VLOG_DBG_RL(, "Unsupported IPv4 set action");
 return -1;
 }
+} else if (nl_attr_type(sa) == OVS_KEY_ATTR_TCP) {
+const struct ovs_key_tcp *key = nl_attr_get(sa);
+const struct ovs_key_tcp *mask = masked ? key + 1 : NULL;
+
+add_set_flow_action(tcp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC);
+add_set_flow_action(tcp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST);
+
+if (mask && !is_all_zeros(mask, sizeof *mask)) {
+VLOG_DBG_RL(, "Unsupported TCP set action");
+return -1;
+}
+} else if (nl_attr_type(sa) == OVS_KEY_ATTR_UDP) {
+const struct ovs_key_udp *key = nl_attr_get(sa);
+const struct ovs_key_udp *mask = masked ? key + 1 : NULL;
+
+add_set_flow_action(udp_src, RTE_FLOW_ACTION_TYPE_SET_TP_SRC);
+add_set_flow_action(udp_dst, RTE_FLOW_ACTION_TYPE_SET_TP_DST);
+
+if (mask && !is_all_zeros(mask, sizeof *mask)) {
+VLOG_DBG_RL(, "Unsupported UDP set action");
+return -1;
+}
 } else {
 VLOG_DBG_RL(,
 "Unsupported set action type %d", nl_attr_type(sa));
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 00/18] netdev datapath actions offload

2020-01-08 Thread Eli Britstein
Currently, netdev datapath offload only accelerates the flow match
sequence by associating a mark per flow. This series introduces the full
offload of netdev datapath flows by having the HW also perform the flow
actions.

This series adds HW offload for output, drop, set MAC, set IPv4 and set
TCP/UDP ports actions using the DPDK rte_flow API.

v1 Travis passed: https://travis-ci.org/elibritstein/OVS/builds/614511472
v2 Travis passed: https://travis-ci.org/elibritstein/OVS/builds/619262148
v3 Travis passed: https://travis-ci.org/elibritstein/OVS/builds/622253870
v4 Travis passed: https://travis-ci.org/elibritstein/OVS/builds/625654160
v5 Travis passed: https://travis-ci.org/elibritstein/OVS/builds/626692202
v6 Travis passed: https://travis-ci.org/elibritstein/OVS/builds/627132194
v7 Travis passed: https://travis-ci.org/elibritstein/OVS/builds/634603553

v1-v2:
- fallback to mark/rss in case of any failure of action offload.
- dp_layer changed to "in_hw" in case the flow is offloaded.
- using netdev_ports_get instead of dp_netdev_lookup_port in stats reading.
- using flow_dump_next API instead of a new API.
- validity checks for last action of output and clone.
- count action should be done for drop as well.
- validity check for output action.
- added doc in Documentation/howto/dpdk.rst.
v2-v3:
- removed refactoring for netdev-offload-dpdk-flow.c file.
- dropped flush commits.
- dbg prints rate-limited, and outside lock.
- added validity check for output action - same flow_api handle for both 
netdevs.
- added a mutex to protect possible concurrent deletion/querying of a flow.
- statistics are collected using flow_get API.
- added more documentation in Documentation/howto/dpdk.rst.
v3-v4:
- debug prints moved to netdev-offload-dpdk.c.
- flow get returns full stats, not diff.
- removed additional fields from offload_info and dp_netdev_flow.
- read HW stats during dpif_netdev_flow_get as well as
  dpif_netdev_flow_dump_next.
- moved actions offload framework just before support commit.
- fixed memory leak in rewrite code.
- dropped clone commit - will be posted in next series.
v4-v5:
- statistics collecting changed to populate dp_layer and offloaded
  attributes. dp_layer changed to "dpdk" for fully offloaded by dpdk.
- display offloaded:partial for partially offloaded flows.
- support filter types "dpdk" and "partially-offloaded" in
  dpctl/dump-flows type=X.
- simplify code readability of rewrite commits.
v5-v6:
- debug prints improvement for partial offload installations. dbg prints
  for failed actions, warn for other failure.
- fixed memory leak in case retrieved dpdk port-id is invalid.
v6-v7:
- rebase to support explicit drop action.
- fix dpctl/dump-flows issues discovered by make check-offloads.

Eli Britstein (17):
  netdev-offload-dpdk: Refactor flow patterns
  netdev-offload-dpdk: Refactor action items freeing scheme
  netdev-offload-dpdk: Dynamically allocate pattern items
  netdev-offload-dpdk: Improve HW offload flow debuggability
  netdev-dpdk: Introduce rte flow query function
  netdev-offload-dpdk: Return UFID-rte_flow entry in find method
  netdev-offload-dpdk: Framework for actions offload
  netdev-offload-dpdk: Implement flow get method
  dpctl: Support dump-flows filters "dpdk" and "partially-offloaded"
  dpif-netdev: Populate dpif class field in offload struct
  netdev-dpdk: Getter function for dpdk port id API
  netdev-offload: Introduce a function to validate same flow api handle
  netdev-offload-dpdk: Support offload of output action
  netdev-offload-dpdk: Support offload of drop action
  netdev-offload-dpdk: Support offload of set MAC actions
  netdev-offload-dpdk: Support offload of set IPv4 actions
  netdev-offload-dpdk: Support offload of set TCP/UDP ports actions

Ophir Munk (1):
  dpif-netdev: Update offloaded flows statistics

 Documentation/howto/dpdk.rst  |  21 +-
 NEWS  |   5 +
 lib/dpctl.c   |  28 +-
 lib/dpctl.man |   2 +
 lib/dpif-netdev.c |  81 +++-
 lib/netdev-dpdk.c |  48 +++
 lib/netdev-dpdk.h |   8 +
 lib/netdev-offload-dpdk.c | 967 --
 lib/netdev-offload-provider.h |   1 +
 lib/netdev-offload.c  |  12 +
 10 files changed, 927 insertions(+), 246 deletions(-)

-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 17/18] netdev-offload-dpdk: Support offload of set IPv4 actions

2020-01-08 Thread Eli Britstein
Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 Documentation/howto/dpdk.rst |  1 +
 NEWS |  4 ++--
 lib/netdev-offload-dpdk.c| 41 +
 3 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index 0e9939ddd..c440bf28e 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -393,6 +393,7 @@ Supported actions for hardware offload are:
 - Output.
 - Drop.
 - Modification of Ethernet (mod_dl_src/mod_dl_dst).
+- Modification of IPv4 (mod_nw_src/mod_nw_dst/mod_nw_ttl).
 
 Further Reading
 ---
diff --git a/NEWS b/NEWS
index a34990388..00aa10cae 100644
--- a/NEWS
+++ b/NEWS
@@ -34,8 +34,8 @@ Post-v2.12.0
interval is increased to 60 seconds for the connection to the
replication server. This value is configurable with the unixctl
command - ovsdb-server/set-active-ovsdb-server-probe-interval.
- * Add hardware offload support for output, drop and set MAC actions
-   (experimental).
+ * Add hardware offload support for output, drop and set actions of
+   MAC and IPv4 (experimental).
- 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
  partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
  type filter supports new filters: "dpdk" and "partially-offloaded".
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index afa8f11e4..78aaa0cb2 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -384,6 +384,29 @@ dump_flow_action(struct ds *s, const struct 
rte_flow_action *actions)
 } else {
 ds_put_format(s, "  Set-mac-%s = null\n", dirstr);
 }
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC ||
+   actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST) {
+const struct rte_flow_action_set_ipv4 *set_ipv4 = actions->conf;
+char *dirstr = actions->type == RTE_FLOW_ACTION_TYPE_SET_IPV4_DST
+   ? "dst" : "src";
+
+ds_put_format(s, "rte flow set-ipv4-%s action:\n", dirstr);
+if (set_ipv4) {
+ds_put_format(s,
+  "  Set-ipv4-%s: "IP_FMT"\n", dirstr,
+  IP_ARGS(set_ipv4->ipv4_addr));
+} else {
+ds_put_format(s, "  Set-ipv4-%s = null\n", dirstr);
+}
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_SET_TTL) {
+const struct rte_flow_action_set_ttl *set_ttl = actions->conf;
+
+ds_put_cstr(s, "rte flow set-ttl action:\n");
+if (set_ttl) {
+ds_put_format(s, "  Set-ttl: %d\n", set_ttl->ttl_value);
+} else {
+ds_put_cstr(s, "  Set-ttl = null\n");
+}
 } else {
 ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
 }
@@ -847,6 +870,12 @@ BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
   MEMBER_SIZEOF(struct ovs_key_ethernet, eth_src));
 BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_mac) ==
   MEMBER_SIZEOF(struct ovs_key_ethernet, eth_dst));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
+  MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_src));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ipv4) ==
+  MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_dst));
+BUILD_ASSERT_DECL(sizeof(struct rte_flow_action_set_ttl) ==
+  MEMBER_SIZEOF(struct ovs_key_ipv4, ipv4_ttl));
 
 static int
 parse_set_actions(struct flow_actions *actions,
@@ -876,6 +905,18 @@ parse_set_actions(struct flow_actions *actions,
 VLOG_DBG_RL(, "Unsupported ETHERNET set action");
 return -1;
 }
+} else if (nl_attr_type(sa) == OVS_KEY_ATTR_IPV4) {
+const struct ovs_key_ipv4 *key = nl_attr_get(sa);
+const struct ovs_key_ipv4 *mask = masked ? key + 1 : NULL;
+
+add_set_flow_action(ipv4_src, RTE_FLOW_ACTION_TYPE_SET_IPV4_SRC);
+add_set_flow_action(ipv4_dst, RTE_FLOW_ACTION_TYPE_SET_IPV4_DST);
+add_set_flow_action(ipv4_ttl, RTE_FLOW_ACTION_TYPE_SET_TTL);
+
+if (mask && !is_all_zeros(mask, sizeof *mask)) {
+VLOG_DBG_RL(, "Unsupported IPv4 set action");
+return -1;
+}
 } else {
 VLOG_DBG_RL(,
 "Unsupported set action type %d", nl_attr_type(sa));
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 01/18] netdev-offload-dpdk: Refactor flow patterns

2020-01-08 Thread Eli Britstein
Refactor the flow patterns code to a helper function for better
readability and towards supporting more matches.

Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 lib/netdev-offload-dpdk.c | 207 +-
 1 file changed, 111 insertions(+), 96 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 96794dc4d..71da12aab 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -410,54 +410,39 @@ add_flow_rss_action(struct flow_actions *actions,
 return rss_data;
 }
 
+struct flow_items {
+struct rte_flow_item_eth  eth;
+struct rte_flow_item_vlan vlan;
+struct rte_flow_item_ipv4 ipv4;
+union {
+struct rte_flow_item_tcp  tcp;
+struct rte_flow_item_udp  udp;
+struct rte_flow_item_sctp sctp;
+struct rte_flow_item_icmp icmp;
+};
+};
+
 static int
-netdev_offload_dpdk_add_flow(struct netdev *netdev,
- const struct match *match,
- struct nlattr *nl_actions OVS_UNUSED,
- size_t actions_len OVS_UNUSED,
- const ovs_u128 *ufid,
- struct offload_info *info)
+parse_flow_match(struct flow_patterns *patterns,
+ struct flow_items *spec,
+ struct flow_items *mask,
+ const struct match *match)
 {
-const struct rte_flow_attr flow_attr = {
-.group = 0,
-.priority = 0,
-.ingress = 1,
-.egress = 0
-};
-struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
-struct flow_actions actions = { .actions = NULL, .cnt = 0 };
-struct rte_flow *flow;
-struct rte_flow_error error;
 uint8_t proto = 0;
-int ret = 0;
-struct flow_items {
-struct rte_flow_item_eth  eth;
-struct rte_flow_item_vlan vlan;
-struct rte_flow_item_ipv4 ipv4;
-union {
-struct rte_flow_item_tcp  tcp;
-struct rte_flow_item_udp  udp;
-struct rte_flow_item_sctp sctp;
-struct rte_flow_item_icmp icmp;
-};
-} spec, mask;
-
-memset(, 0, sizeof spec);
-memset(, 0, sizeof mask);
 
 /* Eth */
 if (!eth_addr_is_zero(match->wc.masks.dl_src) ||
 !eth_addr_is_zero(match->wc.masks.dl_dst)) {
-memcpy(, >flow.dl_dst, sizeof spec.eth.dst);
-memcpy(, >flow.dl_src, sizeof spec.eth.src);
-spec.eth.type = match->flow.dl_type;
+memcpy(>eth.dst, >flow.dl_dst, sizeof spec->eth.dst);
+memcpy(>eth.src, >flow.dl_src, sizeof spec->eth.src);
+spec->eth.type = match->flow.dl_type;
 
-memcpy(, >wc.masks.dl_dst, sizeof mask.eth.dst);
-memcpy(, >wc.masks.dl_src, sizeof mask.eth.src);
-mask.eth.type = match->wc.masks.dl_type;
+memcpy(>eth.dst, >wc.masks.dl_dst, sizeof mask->eth.dst);
+memcpy(>eth.src, >wc.masks.dl_src, sizeof mask->eth.src);
+mask->eth.type = match->wc.masks.dl_type;
 
-add_flow_pattern(, RTE_FLOW_ITEM_TYPE_ETH,
- , );
+add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH,
+ >eth, >eth);
 } else {
 /*
  * If user specifies a flow (like UDP flow) without L2 patterns,
@@ -466,41 +451,41 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
  * NIC (such as XL710) doesn't support that. Below is a workaround,
  * which simply matches any L2 pkts.
  */
-add_flow_pattern(, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
+add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
 }
 
 /* VLAN */
 if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) {
-spec.vlan.tci  = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
-mask.vlan.tci  = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
+spec->vlan.tci  = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
+mask->vlan.tci  = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
 
 /* Match any protocols. */
-mask.vlan.inner_type = 0;
+mask->vlan.inner_type = 0;
 
-add_flow_pattern(, RTE_FLOW_ITEM_TYPE_VLAN,
- , );
+add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN,
+ >vlan, >vlan);
 }
 
 /* IP v4 */
 if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
-spec.ipv4.hdr.type_of_service = match->flow.nw_tos;
-spec.ipv4.hdr.time_to_live= match->flow.nw_ttl;
-spec.ipv4.hdr.next_proto_id   = match->flow.nw_proto;
-spec.ipv4.hdr.src_addr= match->flow.nw_src;
-spec.ipv4.hdr.dst_addr= match->flow.nw_dst;
+spec->ipv4.hdr.type_of_service = match->flow.nw_tos;
+spec->ipv4.hdr.time_to_live= match->flow.nw_ttl;
+spec->ipv4.hdr.next_proto_id   = match->flow.nw_proto;
+spec->ipv4.hdr.src_addr= 

[ovs-dev] [PATCH V7 06/18] netdev-offload-dpdk: Return UFID-rte_flow entry in find method

2020-01-08 Thread Eli Britstein
Change the find method to return the whole entry of UFID-rte_flow
association instead of only the rte_flow field in it, as a pre-step
towards adding and using more fields into that map entry.

Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 29 ++---
 1 file changed, 18 insertions(+), 11 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 1a533ec26..5345c434c 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -59,15 +59,15 @@ struct ufid_to_rte_flow_data {
 };
 
 /* Find rte_flow with @ufid. */
-static struct rte_flow *
-ufid_to_rte_flow_find(const ovs_u128 *ufid)
+static struct ufid_to_rte_flow_data *
+ufid_to_rte_flow_data_find(const ovs_u128 *ufid)
 {
 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
 struct ufid_to_rte_flow_data *data;
 
 CMAP_FOR_EACH_WITH_HASH (data, node, hash, _to_rte_flow) {
 if (ovs_u128_equals(*ufid, data->ufid)) {
-return data->rte_flow;
+return data;
 }
 }
 
@@ -80,6 +80,7 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid,
 {
 size_t hash = hash_bytes(ufid, sizeof *ufid, 0);
 struct ufid_to_rte_flow_data *data = xzalloc(sizeof *data);
+struct ufid_to_rte_flow_data *data_prev;
 
 /*
  * We should not simply overwrite an existing rte flow.
@@ -87,7 +88,10 @@ ufid_to_rte_flow_associate(const ovs_u128 *ufid,
  * Thus, if following assert triggers, something is wrong:
  * the rte_flow is not destroyed.
  */
-ovs_assert(ufid_to_rte_flow_find(ufid) == NULL);
+data_prev = ufid_to_rte_flow_data_find(ufid);
+if (data_prev) {
+ovs_assert(data_prev->rte_flow == NULL);
+}
 
 data->ufid = *ufid;
 data->rte_flow = rte_flow;
@@ -836,16 +840,17 @@ netdev_offload_dpdk_flow_put(struct netdev *netdev, 
struct match *match,
  const ovs_u128 *ufid, struct offload_info *info,
  struct dpif_flow_stats *stats)
 {
-struct rte_flow *rte_flow;
+struct ufid_to_rte_flow_data *rte_flow_data;
 int ret;
 
 /*
  * If an old rte_flow exists, it means it's a flow modification.
  * Here destroy the old rte flow first before adding a new one.
  */
-rte_flow = ufid_to_rte_flow_find(ufid);
-if (rte_flow) {
-ret = netdev_offload_dpdk_destroy_flow(netdev, ufid, rte_flow);
+rte_flow_data = ufid_to_rte_flow_data_find(ufid);
+if (rte_flow_data && rte_flow_data->rte_flow) {
+ret = netdev_offload_dpdk_destroy_flow(netdev, ufid,
+   rte_flow_data->rte_flow);
 if (ret < 0) {
 return ret;
 }
@@ -867,16 +872,18 @@ static int
 netdev_offload_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
  struct dpif_flow_stats *stats)
 {
-struct rte_flow *rte_flow = ufid_to_rte_flow_find(ufid);
+struct ufid_to_rte_flow_data *rte_flow_data;
 
-if (!rte_flow) {
+rte_flow_data = ufid_to_rte_flow_data_find(ufid);
+if (!rte_flow_data || !rte_flow_data->rte_flow) {
 return -1;
 }
 
 if (stats) {
 memset(stats, 0, sizeof *stats);
 }
-return netdev_offload_dpdk_destroy_flow(netdev, ufid, rte_flow);
+return netdev_offload_dpdk_destroy_flow(netdev, ufid,
+rte_flow_data->rte_flow);
 }
 
 static int
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 02/18] netdev-offload-dpdk: Refactor action items freeing scheme

2020-01-08 Thread Eli Britstein
Action item data structures are pointed by rte_flow_action items.
Refactor the code to free the data structures when freeing the
rte_flow_action items, allowing simpler future actions simpler to add to
the code.

Signed-off-by: Eli Britstein 
---
 lib/netdev-offload-dpdk.c | 92 ++-
 1 file changed, 52 insertions(+), 40 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 71da12aab..89828224a 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -374,40 +374,19 @@ add_flow_action(struct flow_actions *actions, enum 
rte_flow_action_type type,
 actions->cnt++;
 }
 
-struct action_rss_data {
-struct rte_flow_action_rss conf;
-uint16_t queue[0];
-};
-
-static struct action_rss_data *
-add_flow_rss_action(struct flow_actions *actions,
-struct netdev *netdev)
+static void
+free_flow_actions(struct flow_actions *actions)
 {
 int i;
-struct action_rss_data *rss_data;
 
-rss_data = xmalloc(sizeof *rss_data +
-   netdev_n_rxq(netdev) * sizeof rss_data->queue[0]);
-*rss_data = (struct action_rss_data) {
-.conf = (struct rte_flow_action_rss) {
-.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
-.level = 0,
-.types = 0,
-.queue_num = netdev_n_rxq(netdev),
-.queue = rss_data->queue,
-.key_len = 0,
-.key  = NULL
-},
-};
-
-/* Override queue array with default. */
-for (i = 0; i < netdev_n_rxq(netdev); i++) {
-   rss_data->queue[i] = i;
+for (i = 0; i < actions->cnt; i++) {
+if (actions->actions[i].conf) {
+free(CONST_CAST(void *, actions->actions[i].conf));
+}
 }
-
-add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, _data->conf);
-
-return rss_data;
+free(actions->actions);
+actions->actions = NULL;
+actions->cnt = 0;
 }
 
 struct flow_items {
@@ -569,6 +548,47 @@ parse_flow_match(struct flow_patterns *patterns,
 return 0;
 }
 
+static void
+add_flow_mark_rss_actions(struct flow_actions *actions,
+  uint32_t flow_mark,
+  const struct netdev *netdev)
+{
+struct rte_flow_action_mark *mark;
+struct action_rss_data {
+struct rte_flow_action_rss conf;
+uint16_t queue[0];
+} *rss_data;
+BUILD_ASSERT_DECL(offsetof(struct action_rss_data, conf) == 0);
+int i;
+
+mark = xzalloc(sizeof *mark);
+
+mark->id = flow_mark;
+add_flow_action(actions, RTE_FLOW_ACTION_TYPE_MARK, mark);
+
+rss_data = xmalloc(sizeof *rss_data +
+   netdev_n_rxq(netdev) * sizeof rss_data->queue[0]);
+*rss_data = (struct action_rss_data) {
+.conf = (struct rte_flow_action_rss) {
+.func = RTE_ETH_HASH_FUNCTION_DEFAULT,
+.level = 0,
+.types = 0,
+.queue_num = netdev_n_rxq(netdev),
+.queue = rss_data->queue,
+.key_len = 0,
+.key  = NULL
+},
+};
+
+/* Override queue array with default. */
+for (i = 0; i < netdev_n_rxq(netdev); i++) {
+   rss_data->queue[i] = i;
+}
+
+add_flow_action(actions, RTE_FLOW_ACTION_TYPE_RSS, _data->conf);
+add_flow_action(actions, RTE_FLOW_ACTION_TYPE_END, NULL);
+}
+
 static int
 netdev_offload_dpdk_add_flow(struct netdev *netdev,
  const struct match *match,
@@ -598,20 +618,12 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
 goto out;
 }
 
-struct rte_flow_action_mark mark;
-struct action_rss_data *rss;
-
-mark.id = info->flow_mark;
-add_flow_action(, RTE_FLOW_ACTION_TYPE_MARK, );
-
-rss = add_flow_rss_action(, netdev);
-add_flow_action(, RTE_FLOW_ACTION_TYPE_END, NULL);
+add_flow_mark_rss_actions(, info->flow_mark, netdev);
 
 flow = netdev_dpdk_rte_flow_create(netdev, _attr,
patterns.items,
actions.actions, );
 
-free(rss);
 if (!flow) {
 VLOG_ERR("%s: rte flow creat error: %u : message : %s\n",
  netdev_get_name(netdev), error.type, error.message);
@@ -624,7 +636,7 @@ netdev_offload_dpdk_add_flow(struct netdev *netdev,
 
 out:
 free(patterns.items);
-free(actions.actions);
+free_flow_actions();
 return ret;
 }
 
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 10/18] dpif-netdev: Update offloaded flows statistics

2020-01-08 Thread Eli Britstein
From: Ophir Munk 

In case a flow is HW offloaded, packets do not reach the SW, thus not
counted for statistics. Use netdev flow get API in order to update the
statistics of flows by the HW statistics.

Co-authored-by: Eli Britstein 
Signed-off-by: Ophir Munk 
Reviewed-by: Oz Shlomo 
Signed-off-by: Eli Britstein 
---
 lib/dpif-netdev.c | 79 ++-
 1 file changed, 66 insertions(+), 13 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 24218210d..7ec217f39 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -3030,10 +3030,49 @@ dp_netdev_pmd_find_flow(const struct 
dp_netdev_pmd_thread *pmd,
 return NULL;
 }
 
+static bool
+dpif_netdev_get_flow_offload_status(const struct dp_netdev *dp,
+const struct dp_netdev_flow *netdev_flow,
+struct dpif_flow_stats *stats,
+struct dpif_flow_attrs *attrs)
+{
+struct nlattr *actions;
+struct ofpbuf wbuffer;
+struct netdev *netdev;
+struct match match;
+
+int ret = 0;
+
+if (!netdev_is_flow_api_enabled()) {
+return false;
+}
+
+netdev = netdev_ports_get(netdev_flow->flow.in_port.odp_port, dp->class);
+if (!netdev) {
+return false;
+}
+/* Taking a global 'port_mutex' to fulfill thread safety
+ * restrictions for the netdev-offload-dpdk module. */
+ovs_mutex_lock(>port_mutex);
+ret = netdev_flow_get(netdev, , , _flow->mega_ufid,
+  stats, attrs, );
+ovs_mutex_unlock(>port_mutex);
+netdev_close(netdev);
+if (ret) {
+return false;
+}
+
+return true;
+}
+
 static void
-get_dpif_flow_stats(const struct dp_netdev_flow *netdev_flow_,
-struct dpif_flow_stats *stats)
+get_dpif_flow_status(const struct dp_netdev *dp,
+ const struct dp_netdev_flow *netdev_flow_,
+ struct dpif_flow_stats *stats,
+ struct dpif_flow_attrs *attrs)
 {
+struct dpif_flow_stats offload_stats;
+struct dpif_flow_attrs offload_attrs;
 struct dp_netdev_flow *netdev_flow;
 unsigned long long n;
 long long used;
@@ -3049,6 +3088,21 @@ get_dpif_flow_stats(const struct dp_netdev_flow 
*netdev_flow_,
 stats->used = used;
 atomic_read_relaxed(_flow->stats.tcp_flags, );
 stats->tcp_flags = flags;
+
+if (dpif_netdev_get_flow_offload_status(dp, netdev_flow,
+_stats, _attrs)) {
+stats->n_packets += offload_stats.n_packets;
+stats->n_bytes += offload_stats.n_bytes;
+stats->used = MAX(stats->used, offload_stats.used);
+stats->tcp_flags |= offload_stats.tcp_flags;
+if (attrs) {
+attrs->offloaded = offload_attrs.offloaded;
+attrs->dp_layer = offload_attrs.dp_layer;
+}
+} else if (attrs) {
+attrs->offloaded = false;
+attrs->dp_layer = "ovs";
+}
 }
 
 /* Converts to the dpif_flow format, using 'key_buf' and 'mask_buf' for
@@ -3056,7 +3110,8 @@ get_dpif_flow_stats(const struct dp_netdev_flow 
*netdev_flow_,
  * 'mask_buf'. Actions will be returned without copying, by relying on RCU to
  * protect them. */
 static void
-dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow *netdev_flow,
+dp_netdev_flow_to_dpif_flow(const struct dp_netdev *dp,
+const struct dp_netdev_flow *netdev_flow,
 struct ofpbuf *key_buf, struct ofpbuf *mask_buf,
 struct dpif_flow *flow, bool terse)
 {
@@ -3099,10 +3154,8 @@ dp_netdev_flow_to_dpif_flow(const struct dp_netdev_flow 
*netdev_flow,
 flow->ufid = netdev_flow->ufid;
 flow->ufid_present = true;
 flow->pmd_id = netdev_flow->pmd_id;
-get_dpif_flow_stats(netdev_flow, >stats);
 
-flow->attrs.offloaded = false;
-flow->attrs.dp_layer = "ovs";
+get_dpif_flow_status(dp, netdev_flow, >stats, >attrs);
 }
 
 static int
@@ -3205,8 +3258,8 @@ dpif_netdev_flow_get(const struct dpif *dpif, const 
struct dpif_flow_get *get)
 netdev_flow = dp_netdev_pmd_find_flow(pmd, get->ufid, get->key,
   get->key_len);
 if (netdev_flow) {
-dp_netdev_flow_to_dpif_flow(netdev_flow, get->buffer, get->buffer,
-get->flow, false);
+dp_netdev_flow_to_dpif_flow(dp, netdev_flow, get->buffer,
+get->buffer, get->flow, false);
 error = 0;
 break;
 } else {
@@ -3381,7 +3434,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
   put->actions, put->actions_len);
 
 if (stats) {
-get_dpif_flow_stats(netdev_flow, stats);
+get_dpif_flow_status(pmd->dp, netdev_flow, stats, NULL);
 }
 if 

[ovs-dev] [PATCH V7 14/18] netdev-offload-dpdk: Support offload of output action

2020-01-08 Thread Eli Britstein
Support offload of output action, also configuring count action for
allowing query statistics of HW offloaded flows.

Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 Documentation/howto/dpdk.rst | 17 ++---
 NEWS |  1 +
 lib/netdev-offload-dpdk.c| 86 +---
 3 files changed, 96 insertions(+), 8 deletions(-)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index 766a7950c..f0fe2bef7 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -370,19 +370,28 @@ The flow hardware offload is disabled by default and can 
be enabled by::
 
 $ ovs-vsctl set Open_vSwitch . other_config:hw-offload=true
 
-So far only partial flow offload is implemented. Moreover, it only works
-with PMD drivers have the rte_flow action "MARK + RSS" support.
+Matches and actions are programmed into HW to achieve full offload of
+the flow. If not all actions are supported, fallback to partial flow
+offload (offloading matches only). Moreover, it only works with PMD
+drivers that support the configured rte_flow actions.
+Partial flow offload requires support of "MARK + RSS" actions. Full
+hardware offload requires support of the actions listed below.
 
 The validated NICs are:
 
 - Mellanox (ConnectX-4, ConnectX-4 Lx, ConnectX-5)
 - Napatech (NT200B01)
 
-Supported protocols for hardware offload are:
+Supported protocols for hardware offload matches are:
+
 - L2: Ethernet, VLAN
-- L3: IPv4, IPv6
+- L3: IPv4
 - L4: TCP, UDP, SCTP, ICMP
 
+Supported actions for hardware offload are:
+
+- Output.
+
 Further Reading
 ---
 
diff --git a/NEWS b/NEWS
index 8e89f77f6..7f31ad80b 100644
--- a/NEWS
+++ b/NEWS
@@ -34,6 +34,7 @@ Post-v2.12.0
interval is increased to 60 seconds for the connection to the
replication server. This value is configurable with the unixctl
command - ovsdb-server/set-active-ovsdb-server-probe-interval.
+ * Add hardware offload support for output actions (experimental).
- 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
  partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
  type filter supports new filters: "dpdk" and "partially-offloaded".
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 55e266072..4fc13b149 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -347,6 +347,26 @@ dump_flow_action(struct ds *s, const struct 
rte_flow_action *actions)
 } else {
 ds_put_cstr(s, "  RSS = null\n");
 }
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_COUNT) {
+const struct rte_flow_action_count *count = actions->conf;
+
+ds_put_cstr(s, "rte flow count action:\n");
+if (count) {
+ds_put_format(s, "  Count: shared=%d, id=%d\n", count->shared,
+  count->id);
+} else {
+ds_put_cstr(s, "  Count = null\n");
+}
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_PORT_ID) {
+const struct rte_flow_action_port_id *port_id = actions->conf;
+
+ds_put_cstr(s, "rte flow port-id action:\n");
+if (port_id) {
+ds_put_format(s, "  Port-id: original=%d, id=%d\n",
+  port_id->original, port_id->id);
+} else {
+ds_put_cstr(s, "  Port-id = null\n");
+}
 } else {
 ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
 }
@@ -724,19 +744,77 @@ netdev_offload_dpdk_mark_rss(struct flow_patterns 
*patterns,
 return flow;
 }
 
+static void
+add_count_action(struct flow_actions *actions)
+{
+struct rte_flow_action_count *count = xzalloc(sizeof *count);
+
+add_flow_action(actions, RTE_FLOW_ACTION_TYPE_COUNT, count);
+}
+
 static int
-parse_flow_actions(struct netdev *netdev OVS_UNUSED,
+add_port_id_action(struct flow_actions *actions,
+   struct netdev *outdev)
+{
+struct rte_flow_action_port_id *port_id;
+int outdev_id;
+
+outdev_id = netdev_dpdk_get_port_id(outdev);
+if (outdev_id < 0) {
+return -1;
+}
+port_id = xzalloc(sizeof *port_id);
+port_id->id = outdev_id;
+add_flow_action(actions, RTE_FLOW_ACTION_TYPE_PORT_ID, port_id);
+return 0;
+}
+
+static int
+add_output_action(struct netdev *netdev,
+  struct flow_actions *actions,
+  const struct nlattr *nla,
+  struct offload_info *info)
+{
+struct netdev *outdev;
+odp_port_t port;
+int ret = 0;
+
+port = nl_attr_get_odp_port(nla);
+outdev = netdev_ports_get(port, info->dpif_class);
+if (outdev == NULL) {
+VLOG_DBG_RL(, "Cannot find netdev for odp port %"PRIu32, port);
+return -1;
+}
+if (!netdev_flow_api_equals(netdev, outdev) ||
+add_port_id_action(actions, outdev)) {
+VLOG_DBG_RL(, "%s: Output to port \'%s\' cannot be offloaded.",
+

[ovs-dev] [PATCH V7 15/18] netdev-offload-dpdk: Support offload of drop action

2020-01-08 Thread Eli Britstein
Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 Documentation/howto/dpdk.rst | 1 +
 NEWS | 2 +-
 lib/netdev-offload-dpdk.c| 4 
 3 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/howto/dpdk.rst b/Documentation/howto/dpdk.rst
index f0fe2bef7..d9de7bedd 100644
--- a/Documentation/howto/dpdk.rst
+++ b/Documentation/howto/dpdk.rst
@@ -391,6 +391,7 @@ Supported protocols for hardware offload matches are:
 Supported actions for hardware offload are:
 
 - Output.
+- Drop.
 
 Further Reading
 ---
diff --git a/NEWS b/NEWS
index 7f31ad80b..ffa6a91be 100644
--- a/NEWS
+++ b/NEWS
@@ -34,7 +34,7 @@ Post-v2.12.0
interval is increased to 60 seconds for the connection to the
replication server. This value is configurable with the unixctl
command - ovsdb-server/set-active-ovsdb-server-probe-interval.
- * Add hardware offload support for output actions (experimental).
+ * Add hardware offload support for output and drop actions (experimental).
- 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
  partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
  type filter supports new filters: "dpdk" and "partially-offloaded".
diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 4fc13b149..026e77756 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -367,6 +367,8 @@ dump_flow_action(struct ds *s, const struct rte_flow_action 
*actions)
 } else {
 ds_put_cstr(s, "  Port-id = null\n");
 }
+} else if (actions->type == RTE_FLOW_ACTION_TYPE_DROP) {
+ds_put_cstr(s, "rte flow drop action\n");
 } else {
 ds_put_format(s, "unknown rte flow action (%d)\n", actions->type);
 }
@@ -811,6 +813,8 @@ parse_flow_actions(struct netdev *netdev,
 if (add_output_action(netdev, actions, nla, info)) {
 return -1;
 }
+} else if (nl_attr_type(nla) == OVS_ACTION_ATTR_DROP) {
+add_flow_action(actions, RTE_FLOW_ACTION_TYPE_DROP, NULL);
 } else {
 VLOG_DBG_RL(, "Unsupported action type %d", nl_attr_type(nla));
 return -1;
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 05/18] netdev-dpdk: Introduce rte flow query function

2020-01-08 Thread Eli Britstein
Introduce a rte flow query function as a pre-step towards reading HW
statistics of fully offloaded flows.

Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 lib/netdev-dpdk.c | 30 ++
 lib/netdev-dpdk.h |  6 ++
 2 files changed, 36 insertions(+)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 8198a0b7d..4ff5a70a8 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -4541,6 +4541,36 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev,
 return flow;
 }
 
+int
+netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
+ struct rte_flow *rte_flow,
+ struct rte_flow_query_count *query,
+ struct rte_flow_error *error)
+{
+struct rte_flow_action_count count = { .shared = 0, .id = 0 };
+const struct rte_flow_action actions[] = {
+{
+.type = RTE_FLOW_ACTION_TYPE_COUNT,
+.conf = ,
+},
+{
+.type = RTE_FLOW_ACTION_TYPE_END,
+},
+};
+struct netdev_dpdk *dev;
+int ret;
+
+if (!is_dpdk_class(netdev->netdev_class)) {
+return -1;
+}
+
+dev = netdev_dpdk_cast(netdev);
+ovs_mutex_lock(>mutex);
+ret = rte_flow_query(dev->port_id, rte_flow, actions, query, error);
+ovs_mutex_unlock(>mutex);
+return ret;
+}
+
 #define NETDEV_DPDK_CLASS_COMMON\
 .is_pmd = true, \
 .alloc = netdev_dpdk_alloc, \
diff --git a/lib/netdev-dpdk.h b/lib/netdev-dpdk.h
index 60631c4f0..59919a89a 100644
--- a/lib/netdev-dpdk.h
+++ b/lib/netdev-dpdk.h
@@ -31,6 +31,7 @@ struct rte_flow_error;
 struct rte_flow_attr;
 struct rte_flow_item;
 struct rte_flow_action;
+struct rte_flow_query_count;
 
 void netdev_dpdk_register(void);
 void free_dpdk_buf(struct dp_packet *);
@@ -47,6 +48,11 @@ netdev_dpdk_rte_flow_create(struct netdev *netdev,
 const struct rte_flow_item *items,
 const struct rte_flow_action *actions,
 struct rte_flow_error *error);
+int
+netdev_dpdk_rte_flow_query_count(struct netdev *netdev,
+ struct rte_flow *rte_flow,
+ struct rte_flow_query_count *query,
+ struct rte_flow_error *error);
 
 #else
 
-- 
2.14.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH V7 03/18] netdev-offload-dpdk: Dynamically allocate pattern items

2020-01-08 Thread Eli Britstein
Instead of statically allocated pattern items on the stack, dynamically
allocate only the required items while parsing the matches, to simplify
the parsing and make it self-contained, without need of external types,
making it easier to support more matches in the future.

Signed-off-by: Eli Britstein 
Reviewed-by: Oz Shlomo 
---
 lib/netdev-offload-dpdk.c | 206 ++
 1 file changed, 118 insertions(+), 88 deletions(-)

diff --git a/lib/netdev-offload-dpdk.c b/lib/netdev-offload-dpdk.c
index 89828224a..a5bc4ad33 100644
--- a/lib/netdev-offload-dpdk.c
+++ b/lib/netdev-offload-dpdk.c
@@ -374,6 +374,24 @@ add_flow_action(struct flow_actions *actions, enum 
rte_flow_action_type type,
 actions->cnt++;
 }
 
+static void
+free_flow_patterns(struct flow_patterns *patterns)
+{
+int i;
+
+for (i = 0; i < patterns->cnt; i++) {
+if (patterns->items[i].spec) {
+free(CONST_CAST(void *, patterns->items[i].spec));
+}
+if (patterns->items[i].mask) {
+free(CONST_CAST(void *, patterns->items[i].mask));
+}
+}
+free(patterns->items);
+patterns->items = NULL;
+patterns->cnt = 0;
+}
+
 static void
 free_flow_actions(struct flow_actions *actions)
 {
@@ -389,39 +407,30 @@ free_flow_actions(struct flow_actions *actions)
 actions->cnt = 0;
 }
 
-struct flow_items {
-struct rte_flow_item_eth  eth;
-struct rte_flow_item_vlan vlan;
-struct rte_flow_item_ipv4 ipv4;
-union {
-struct rte_flow_item_tcp  tcp;
-struct rte_flow_item_udp  udp;
-struct rte_flow_item_sctp sctp;
-struct rte_flow_item_icmp icmp;
-};
-};
-
 static int
 parse_flow_match(struct flow_patterns *patterns,
- struct flow_items *spec,
- struct flow_items *mask,
  const struct match *match)
 {
+uint8_t *next_proto_mask = NULL;
 uint8_t proto = 0;
 
 /* Eth */
 if (!eth_addr_is_zero(match->wc.masks.dl_src) ||
 !eth_addr_is_zero(match->wc.masks.dl_dst)) {
-memcpy(>eth.dst, >flow.dl_dst, sizeof spec->eth.dst);
-memcpy(>eth.src, >flow.dl_src, sizeof spec->eth.src);
-spec->eth.type = match->flow.dl_type;
+struct rte_flow_item_eth *spec, *mask;
+
+spec = xzalloc(sizeof *spec);
+mask = xzalloc(sizeof *mask);
+
+memcpy(>dst, >flow.dl_dst, sizeof spec->dst);
+memcpy(>src, >flow.dl_src, sizeof spec->src);
+spec->type = match->flow.dl_type;
 
-memcpy(>eth.dst, >wc.masks.dl_dst, sizeof mask->eth.dst);
-memcpy(>eth.src, >wc.masks.dl_src, sizeof mask->eth.src);
-mask->eth.type = match->wc.masks.dl_type;
+memcpy(>dst, >wc.masks.dl_dst, sizeof mask->dst);
+memcpy(>src, >wc.masks.dl_src, sizeof mask->src);
+mask->type = match->wc.masks.dl_type;
 
-add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH,
- >eth, >eth);
+add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_ETH, spec, mask);
 } else {
 /*
  * If user specifies a flow (like UDP flow) without L2 patterns,
@@ -435,36 +444,45 @@ parse_flow_match(struct flow_patterns *patterns,
 
 /* VLAN */
 if (match->wc.masks.vlans[0].tci && match->flow.vlans[0].tci) {
-spec->vlan.tci  = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
-mask->vlan.tci  = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
+struct rte_flow_item_vlan *spec, *mask;
+
+spec = xzalloc(sizeof *spec);
+mask = xzalloc(sizeof *mask);
+
+spec->tci = match->flow.vlans[0].tci & ~htons(VLAN_CFI);
+mask->tci = match->wc.masks.vlans[0].tci & ~htons(VLAN_CFI);
 
 /* Match any protocols. */
-mask->vlan.inner_type = 0;
+mask->inner_type = 0;
 
-add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN,
- >vlan, >vlan);
+add_flow_pattern(patterns, RTE_FLOW_ITEM_TYPE_VLAN, spec, mask);
 }
 
 /* IP v4 */
 if (match->flow.dl_type == htons(ETH_TYPE_IP)) {
-spec->ipv4.hdr.type_of_service = match->flow.nw_tos;
-spec->ipv4.hdr.time_to_live= match->flow.nw_ttl;
-spec->ipv4.hdr.next_proto_id   = match->flow.nw_proto;
-spec->ipv4.hdr.src_addr= match->flow.nw_src;
-spec->ipv4.hdr.dst_addr= match->flow.nw_dst;
+struct rte_flow_item_ipv4 *spec, *mask;
+
+spec = xzalloc(sizeof *spec);
+mask = xzalloc(sizeof *mask);
+
+spec->hdr.type_of_service = match->flow.nw_tos;
+spec->hdr.time_to_live= match->flow.nw_ttl;
+spec->hdr.next_proto_id   = match->flow.nw_proto;
+spec->hdr.src_addr= match->flow.nw_src;
+spec->hdr.dst_addr= match->flow.nw_dst;
 
-mask->ipv4.hdr.type_of_service = match->wc.masks.nw_tos;
-mask->ipv4.hdr.time_to_live= match->wc.masks.nw_ttl;
-mask->ipv4.hdr.next_proto_id   = 

[ovs-dev] [ovs-dev v3] dpif-netdev: Allow to set max capacity of flow on netdev.

2020-01-08 Thread xiangxia . m . yue
From: Tonghao Zhang 

For installing more than MAX_FLOWS (65536) flows to netdev datapath.
Add the ovs-appctl subcommand "dpif-netdev/pmd-set-max-flow" which
can change the flow number which netdev datapath support.

Signed-off-by: Tonghao Zhang 
---
v3:
* change the UINT_MAX to UINT32_MAX
* add information to NEWS/dpif-netdev-unixctl.man
* simply the codes and remove new-line at the end 
v2:
* change int type to atomic_uint32_t
* check max flow number is whether valid (0 < max-flow < UINT_MAX).
---
 NEWS|  3 +++
 lib/dpif-netdev-unixctl.man |  4 +++
 lib/dpif-netdev.c   | 49 +++--
 3 files changed, 54 insertions(+), 2 deletions(-)

diff --git a/NEWS b/NEWS
index 965facaf852d..55bd1255104d 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,9 @@ Post-v2.12.0
  * Add option to enable, disable and query TCP sequence checking in
conntrack.
  * Add support for conntrack zone limits.
+ * New ovs-appctl "dpif-netdev/pmd-set-max-flow" and
+   "dpif-netdev/pmd-show-max-flow" commands for userspace datapath
+   change the max flows which support.
- AF_XDP:
  * New option 'use-need-wakeup' for netdev-afxdp to control enabling
of corresponding 'need_wakeup' flag in AF_XDP rings.  Enabled by default
diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
index 6c54f6f9cc3b..92cbcd07e984 100644
--- a/lib/dpif-netdev-unixctl.man
+++ b/lib/dpif-netdev-unixctl.man
@@ -217,3 +217,7 @@ with port names, which this thread polls.
 .
 .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
 Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current usage.
+.IP "\fBdpif-netdev/pmd-set-max-flow\fR [\fInumber\fR]"
+Sets the max flow which netdev datapath supports (default 65536).
+.IP "\fBdpif-netdev/pmd-show-max-flow"
+Shows the current max flow which netdev datapath supports.
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 24218210d4a8..f789ccf6cc8b 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -97,7 +97,6 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 #define DEFAULT_TX_FLUSH_INTERVAL 0
 
 /* Configuration parameters. */
-enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
 enum { MAX_METERS = 65536 };/* Maximum number of meters. */
 enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */
 enum { N_METER_LOCKS = 64 };/* Maximum number of meters. */
@@ -116,6 +115,9 @@ COVERAGE_DEFINE(datapath_drop_rx_invalid_packet);
 /* Protects against changes to 'dp_netdevs'. */
 static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
 
+/* Maximum number of flows in flow table. */
+static atomic_uint32_t netdev_max_flow = ATOMIC_VAR_INIT(65536);
+
 /* Contains all 'struct dp_netdev's. */
 static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
 = SHASH_INITIALIZER(_netdevs);
@@ -1123,6 +1125,39 @@ pmd_info_show_perf(struct ds *reply,
 }
 }
 
+static void
+dpif_netdev_set_max_flow(struct unixctl_conn *conn,
+ int argc OVS_UNUSED,
+ const char *argv[],
+ void *aux OVS_UNUSED)
+{
+long long max_flow = atoll(argv[1]);
+
+if (max_flow <= 0 || max_flow >= UINT32_MAX) {
+unixctl_command_reply_error(conn,
+"max-flow should: > 0 and < UINT32_MAX");
+return;
+}
+
+atomic_store_relaxed(_max_flow, max_flow);
+unixctl_command_reply(conn, NULL);
+}
+
+static void
+dpif_netdev_show_max_flow(struct unixctl_conn *conn,
+  int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED,
+  void *aux OVS_UNUSED)
+{
+uint32_t max_flow;
+
+atomic_read_relaxed(_max_flow, _flow);
+
+char *reply = xasprintf("%u", max_flow);
+unixctl_command_reply(conn, reply);
+free(reply);
+}
+
 static int
 compare_poll_list(const void *a_, const void *b_)
 {
@@ -1427,6 +1462,14 @@ dpif_netdev_init(void)
  "[-us usec] [-q qlen]",
  0, 10, pmd_perf_log_set_cmd,
  NULL);
+unixctl_command_register("dpif-netdev/pmd-set-max-flow",
+ "number",
+ 1, 1, dpif_netdev_set_max_flow,
+ NULL);
+unixctl_command_register("dpif-netdev/pmd-show-max-flow",
+ "",
+ 0, 0, dpif_netdev_show_max_flow,
+ NULL);
 return 0;
 }
 
@@ -3356,7 +3399,9 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
 if (!netdev_flow) {
 if (put->flags & DPIF_FP_CREATE) {
-if (cmap_count(>flow_table) < MAX_FLOWS) {
+uint32_t max_flow;
+atomic_read_relaxed(_max_flow, _flow);
+if 

Re: [ovs-dev] [PATCH v1 ovn 0/1] Forwarding group to load balance l2 traffic with liveness detection

2020-01-08 Thread Numan Siddique
On Tue, Jan 7, 2020 at 3:55 AM Manoj Sharma  wrote:
>
> A forwarding group is an aggregation of logical switch ports of a
> logical switch to load balance traffic across the ports. It also
> detects the liveness if the logical switch ports are realized as
> OVN tunnel ports on the physical topology.
>
> In the below logical topology diagram, the logical switch has two ports
> connected to chassis / external routers R1 and R2. The logical router needs
> to send traffic to an external network that is connected through R1 and R2.
>
> ++
>  +--+ R1 |*
> /   ++  ** **
>   +--++--+ / lsp1  * *
>   | Logical  ||   Logical|/   * External  *
>   | Router   ++   switch X*  Network  *
>   |  ||  |\   *   *
>   +--++--+ \ lsp2  * *
>  ^  \   ++  ** **
>  |   +--+ R2 |*
>  |  ++
>fwd_group -> (lsp1, lsp2)
>
> In the absence of forwarding group, the logical router will have unicast
> route to point to either R1 or R2. In case of R1 or R2 going down, it will
> require control plane's intervention to update the route to point to proper
> nexthop.
>
> With forwarding group, a virtual IP (VIP) and virtual MAC (VMAC) address
> are configured on the forwarding group. The logical router points to the
> forwarding group's VIP as the nexthop for hosts behind R1 and R2.
>
> [root@fwd-group]# ovn-nbctl fwd-group-add fwd ls1 VIP_1 VMAC_1 lsp1 lsp2
>
> [root@fwd-group]# ovn-nbctl fwd-group-list
> UUIDFWD_GROUP  VIPVMAC   CHILD_PORTS
> UUID_1fwd VIP_1  VMAC_1   lsp1 lsp2
>
> [root@fwd-group]# ovn-nbctl lr-route-list lr1
> IPv4 Routes
> external_host_prefix/prefix_lenVIP_1 dst-ip
>
> The logical switch will install an ARP responder rule to reply with VMAC
> as the MAC address for ARP requests for VIP. It will also install a MAC
> lookup rule for VMAC with action to load balance across the logical switch
> ports of the forwarding group.
>
> Datapath: "ls1" Pipeline: ingress
> table=10(ls_in_arp_rsp  ), priority=50   , match=(arp.tpa == VIP_1 &&
> arp.op == 1), action=(eth.dst = eth.src; eth.src = VMAC_1; arp.op = 2;
> /* ARP reply */ arp.tha = arp.sha; arp.sha = VMAC_1; arp.tpa = arp.spa;
> arp.spa = VIP; outport = inport; flags.loopback = 1; output;)
>
> table=13(ls_in_l2_lkup  ), priority=50   , match=(eth.dst == VMAC_1),
> action=(fwd_group("lsp1","lsp2");)
>
> In the physical topology, OVN managed hypervisors are connected to R1 and
> R2 through overlay tunnels. The logical flow's "fwd_group" action mentioned
> above, gets translated to openflow group type "select" with one bucket for
> each logical switch port.
>
> cookie=0x0, duration=16.869s, table=29, n_packets=4, n_bytes=392, idle_age=0,
> priority=111,metadata=0x9,dl_dst=VMAC_1 actions=group:1
>
> group_id=1,type=select,selection_method=dp_hash,
> bucket=actions=load:0x2->NXM_NX_REG15[0..15], resubmit(,32),
> bucket=actions=load:0x3->NXM_NX_REG15[0..15],resubmit(,32)
>
> where 0x2 and 0x3 are port tunnel keys of lsp1 and lsp2.
>
> The openflow group type "select" with selection method "dp_hash" load
> balances traffic based on source and destination Ethernet address, VLAN ID,
> Ethernet type, IPv4/v6 source and destination address and protocol, and for
> TCP and SCTP only, the source and destination ports.
>
> To detect path failure between OVN managed hypervisors and (R1, R2), BFD is
> enabled on the tunnel interfaces. The openflow group is modified to include
> watch_port for liveness detection of a port. To enable liveness, --liveness
> option must be specified while configuring the forwarding group.
>
> group_id=1,type=select,selection_method=dp_hash,
>   bucket=watch_port:31,actions=load:0x2->NXM_NX_REG15[0..15],resubmit(,32),
>   bucket=watch_port:32,actions=load:0x3->NXM_NX_REG15[0..15],resubmit(,32)
>
> Where 31 and 32 are ovs port numbers for the tunnel interfaces connecting
> to R1 and R2.
>
> If the BFD forwarding status is down for any of the tunnels, the
> corresponding bucket will not be selected for packet forwarding.
>
> Signed-off-by: Manoj Sharma 


Hi Manoj,

Thanks for the patch. I didn't look into the complete patch. I have
initial few comments.

The patch fails to compile when --enable-Werror and --enable-sparse
are enabled during
configuration.


 -I ../ovn -I ./include -I
/home/nusiddiq/workspace_cpp/openvswitch/ovs/include -I
/home/nusiddiq/workspace_cpp/openvswitch/ovs/_gcc/include -I

Re: [ovs-dev] [PATCH ovn] OVN container scripts: Support for cluster mode

2020-01-08 Thread Numan Siddique
On Wed, Jan 8, 2020 at 7:02 AM  wrote:
>
> From: Aliasgar Ginwala 
>
> 1. Container scripts for starting ovn central node
>containers in HA using cluster mode
> 2. Update documentation about the same.
>
> Signed-off-by: Aliasgar Ginwala 

Thanks. I applied this patch to master. I didn't test it.

Thanks
Numan

> ---
>  Documentation/intro/install/general.rst | 34 +-
>  utilities/docker/start-ovn  | 46 +++--
>  2 files changed, 77 insertions(+), 3 deletions(-)
>
> diff --git a/Documentation/intro/install/general.rst 
> b/Documentation/intro/install/general.rst
> index 52bfd7d18..4df1a5538 100644
> --- a/Documentation/intro/install/general.rst
> +++ b/Documentation/intro/install/general.rst
> @@ -449,6 +449,38 @@ Start OVN containers using below command::
>  $ docker run -itd --net=host --name=ovn-northd \
>: ovn-northd-tcp
>
> +Start OVN containers in cluster mode for a 3 node cluster using below command
> +on node1::
> +
> +$ docker run -e "host_ip=" -e "nb_db_port=" -itd \
> +  --name=ovn-nb-raft --net=host --privileged : \
> +  ovn-nb-cluster-create
> +
> +$ docker run -e "host_ip=" -e "sb_db_port=" -itd \
> +  --name=ovn-sb-raft --net=host --privileged : \
> +  ovn-sb-cluster-create
> +
> +$ docker run -e "OVN_NB_DB=tcp::6641,tcp::6641,\
> +  tcp::6641" -e "OVN_SB_DB=tcp::6642,tcp::6642,\
> +  tcp::6642" -itd --name=ovn-northd-raft : \
> +  ovn-northd-cluster
> +
> +Start OVN containers in cluster mode using below command on node2 and node3 \
> +to make them join the peer using below command::
> +
> +$ docker run -e "host_ip=" -e "remote_host=" \
> +  -e "nb_db_port=" -itd --name=ovn-nb-raft --net=host \
> +  --privileged : ovn-nb-cluster-join
> +
> +$ docker run -e "host_ip=" -e "remote_host=" \
> +  -e "sb_db_port=" -itd --name=ovn-sb-raft --net=host \
> +  --privileged : ovn-sb-cluster-join
> +
> +$ docker run -e "OVN_NB_DB=tcp::6641,tcp::6641,\
> +  tcp::6641" -e "OVN_SB_DB=tcp::6642,tcp::6642,\
> +  tcp::6642" -itd --name=ovn-northd-raft : \
> +  ovn-northd-cluster
> +
>  Start OVN containers using unix socket::
>
>  $ docker run -itd --net=host --name=ovn-nb \
> @@ -465,7 +497,7 @@ Start OVN containers using unix socket::
>
>  .. note::
>  Current ovn central components comes up in docker image in a standalone
> -mode with protocol tcp.
> +and cluster mode with protocol tcp.
>
>  The debian docker file use ubuntu 16.04 as a base image for reference.
>
> diff --git a/utilities/docker/start-ovn b/utilities/docker/start-ovn
> index fbdd2af91..51e5162c5 100755
> --- a/utilities/docker/start-ovn
> +++ b/utilities/docker/start-ovn
> @@ -22,12 +22,34 @@ case $1 in
>--ovnsb-db="unix:/var/run/ovn/ovnsb_db.sock" \
>--log-file=/var/log/ovn/ovn-northd.log
>  ;;
> +"ovn-northd-cluster") ovn-northd --pidfile \
> +  --ovnnb-db=$OVN_NB_DB \
> +  --ovnsb-db=$OVN_SB_DB \
> +  --log-file=/var/log/ovn/ovn-northd.log
> +;;
>  "ovn-nb-tcp") source /etc/ovn/ovn_default_nb_port
>/usr/share/ovn/scripts/ovn-ctl start_ovsdb
>ovn-nbctl set-connection ptcp:$nb_db_port
>/usr/share/ovn/scripts/ovn-ctl stop_ovsdb
>/usr/share/ovn/scripts/ovn-ctl run_nb_ovsdb
>  ;;
> +"ovn-nb-cluster-create") /usr/share/ovn/scripts/ovn-ctl \
> + --db-nb-addr=$host_ip \
> + --db-nb-cluster-local-addr=$host_ip \
> + start_nb_ovsdb
> + ovn-nbctl set-connection ptcp:$nb_db_port
> + /usr/share/ovn/scripts/ovn-ctl stop_nb_ovsdb
> + /usr/share/ovn/scripts/ovn-ctl \
> + --db-nb-addr=$host_ip \
> + --db-nb-cluster-local-addr=$host_ip \
> + run_nb_ovsdb
> +;;
> +"ovn-nb-cluster-join") /usr/share/ovn/scripts/ovn-ctl \
> +   --db-nb-addr=$host_ip \
> +   --db-nb-cluster-local-addr=$host_ip \
> +   --db-nb-cluster-remote-addr=$remote_host \
> +   run_nb_ovsdb
> +;;
>  "ovn-sb-tcp") source /etc/ovn/ovn_default_sb_port
>/usr/share/ovn/scripts/ovn-ctl start_ovsdb
>ovn-sbctl set-connection ptcp:$sb_db_port
> @@ -42,8 +64,28 @@ case $1 in
>--ovnsb-db="tcp:$northd_host:$sb_db_port" \
>--log-file=/var/log/ovn/ovn-northd.log
>  ;;
> +"ovn-sb-cluster-create") /usr/share/ovn/scripts/ovn-ctl \
> + 

Re: [ovs-dev] how to use ovn-scale-test when i do not use git repo

2020-01-08 Thread shenjian . gc


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Investment Seed Capital

2020-01-08 Thread Kyle Smith
Attn:CEO
 

I have an investor looking to invest in entrepreneurial teams with big ideas 
and a need for Seed Capital to turn their business or ideas into great 
Companies. 
He has funds available for Investment and want them invested under you or your 
Company strict guidance. For more details, please reply to this email so we can 
discuss further.
 
Yours faithfully.
 Kyle Smith
 Mobile Whatsapp  + 1 (646) 441 8989  (USA)
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] netdev_afxdp: Detects combined channels and aborts wrong config

2020-01-08 Thread Yi-Hung Wei
Hi Ben,

After taking closer look at different driver implementations, I found
that the use of combined channels is actually driver dependent.

>From ethtool (8), a channel is an IRQ and the set of queues that can
trigger that IRQ.  I tend to think channel as rx/tx queues from
software's point of view.  There are 4 types of channels that ethtool
can configure, rx, tx, other, and combined.

For Intel NICs, (at least for NICs that use ixgbe, i40 driver), we can
only configure channels through 'combined' parameter. It will set both
rx and tx channel to N with the following command.
$ ethool -L eth0 combined N

However, it does not support configuring rx and tx channel separately.
We can only set/get number of combined channel, but not rx or tx
channel.
For example,
$ ethtool -l eth0
Channel parameters for eth0:
Pre-set maximums:
RX:0
TX:0
Other:1
Combined:63
Current hardware settings:
RX:0
TX:0
Other:1
Combined:8

On the other hand, NICs that use Broadcom tg3 drivers can configure
its rx and tx channel individually, but can not set with 'combined'
parameter. For Mellanox, mlx4 driver supports set/get rx/tx channels,
but mlx5 driver can only configured through combined channel.

Another ethtool example with Broadcom NetXtreme BCM5720 NIC.
$ ethtool -L eno1 rx 2 tx 1
$ ethtool -l eno1
Channel parameters for eno1:
Pre-set maximums:
RX: 4
TX: 4
Other:  0
Combined:   0
Current hardware settings:
RX: 2
TX: 1
Other:  0
Combined:   0

Back to this patch, what I was trying to do is to make sure that the
number of rx channels reported from ethtool is equal to n_rxq in
AF_XDP's port setting.  Since, I was using a testbed with Intel NIC, I
assume to get # of rx channels from 'combined'.  To support other
types of NICs, in the next version, I will first compare n_rxq with
'rx'. In case of 'rx' is 0,  'combined' will be used.  I will update
the related documentation as well.

Thanks,

-Yi-Hung


On Mon, Jan 6, 2020 at 2:15 PM Ben Pfaff  wrote:
>
> On Mon, Dec 23, 2019 at 11:33:57AM -0800, Yi-Hung Wei wrote:
> > This patches detects the number of combined channels on a AF_XDP port
> > via using ethtool interface.  If the number of combined channels
> > is different from the n_rxq, netdev_afxdp will not be able to get
> > all the packets from that port.  Thus, abort the port setup when
> > the case is detected.
> >
> > Signed-off-by: Yi-Hung Wei 
> > ---
> > Travis CI: https://travis-ci.org/YiHungWei/ovs/builds/627972465
>
> I don't know what a combined channel is.  Should the documentation talk
> about this?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn v4 0/2] Add a way to delete QoS directly

2020-01-08 Thread taoyunxi...@cmss.chinamobile.com
Hi Han,
If you have time, Could you review this patch or give a response.


Thanks ,
Yun 


 
From: taoyunxi...@cmss.chinamobile.com
Date: 2019-12-24 19:55
To: Han Zhou
CC: ovs-dev
Subject: Re: Re: [ovs-dev] [PATCH ovn v4 0/2] Add a way to delete QoS directly
Hi Han,
 Thanks for your review! 

 1. As for point 1 and point 4, I think it is not bad  to use external_ids 
to record association between Neutron and OVN, even we can record LS info also. 
 But it is not directly and clearly. 
 Actually, I find  almost resource tables  have "name" column, included 
ACL table, which summited by this patch[0]. The purpose of adding "name" for 
ACL, is also to make ACL rule could be easily find and check in OVN.  

 2. As for point 2, I am agreed, but do not know how to do now. 

 3. As for point 3,  I think we can record  LS info, it is not bad.  

 
 As for why I want to change "qos-del" comand,  I want to explain more  
detailed.  Hope to get your advice.

1. QoS rule resource of Neutron is not mapping any resource of OVN, and it 
just contains rate, burst, dircetion and so on. 

2. In Neutron, when we bind QoS rule to port or network, it will triger OVN 
to record in QoS table.

3. When we update QoS rule in Neutron , it will DIRECTLY update its record 
of  Neutron DB(will not wait OVN to update, because no mapping resource),  
gradually, it will update port or network which binded this QoS rule. But the 
QoS rule has already become the lasted one in Neutron DB, and networking-ovn 
can not get the old  QoS rule of Neutron.  So we can not update it exactly. 

4. For the above situaiton, we can only delete old rules that have the same 
direction as the new rules, or , we will expand the scope of deletion.
  This patch [1]  is the logic process for networking-ovn to execute QoS 
rule.  The update process is not exactly and suitable, I think the Core OVN may 
need some changes.


  [0] 
https://github.com/ovn-org/ovn/commit/17df18dac9d2fa875c2f86a54bbc84931689d42c
  [1] https://review.opendev.org/#/c/692084/
 

  Hope to have your advice.


Thanks,
Yun

From: Han Zhou
Date: 2019-12-24 01:33
To: Yunxiang Tao
CC: ovs-dev
Subject: Re: [ovs-dev] [PATCH ovn v4 0/2] Add a way to delete QoS directly


On Mon, Dec 23, 2019 at 2:37 AM Yunxiang Tao  
wrote:
>
> Currently, qos can only be deleted by indirect way which must designate
> switch or more parameters. The first patch add "name" column to QoS table,
> and make QoS table could be listed by "name" witch comand
> "ovn-nbctl list qos "name"".
>
> The second patch change the original "qos-del" to "ls-qos-del",  add
> a new "qos-del" command. By the new command, you can delete qos
> by uuid or name of the qos. It is useful. For example, we can associate
> the qos table in neutron and OVN by "name" of qos in OVN, so neutron
> could find and easily delete the corresponding qos in OVN.
>

Thanks for the patch. I have below comments:
1. There are other ways to associate QoS between Neutron and OVN. For example, 
the external-ids column can be used to add any customized key-value pairs, such 
as external_ids:neutron:qos_name = . Is this sufficient for 
your use case?
2. If we believe it is useful for qos-del command to delete qos by name/uuid, 
it is better to extend the exist command "qos-del" to handle both old and new 
formats, and we should NOT rename the existing command because it will break 
existing customer tools.
3. It is more efficient to specify the lswitch in the qos-del command so that 
it doesn't need to iterate every lswitch. Is there a use case that neutron 
needs to delete a QoS record without knowing which lswitch should it be deleted 
from?
4. QoS and ACL are implemented in similar way with similar principles. If 
"name" is needed in QoS table, probably it is also needed in ACL table. In 
other words, if we can handle ACL well without introducing the "name" column, 
probably we could do the same for QoS table without problem. What's your 
thought on this?

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Update data-type of a column in OVN-SB

2020-01-08 Thread Dhathri Purohith
Hi,



We have a requirement to update the datatype of a column in OVN-SB. However, 
this is not straight-forward.

There could be existing entries in the database that has the old type and hence 
the change will not be backward compatible.



Here is the reason for this requirement -

According to RFC 2132 (https://tools.ietf.org/html/rfc2132) , section 9.4, DHCP 
option 66 (tftp_server) can be hostname.

Current OVN implementation requires DHCP Option 66 (tftp_server) to be of type 
“ipv4”, which is restrictive and does not conform to the RFC.

Hence, we want to change the type for DHCP option 66 (tftp_server) to “str” 
instead of “ipv4”.

When we make this change, we need a way to convert the existing entries in the 
DB to “str” from “ipv4”, else the parser will complain.

For eg., consider an entry such as this in the DB –

options : {dns_server="{8.8.8.8, 9.9.9.9}", 
domain_name="\"dom.com\"", lease_time="43200", mtu="1442", router="50.0.0.1", 
server_id="50.0.0.1", server_mac="aa:bb:cc:dd:00:11", tftp_server="4.4.4.4"}

Since the parser is now expecting a “str” for “tftp_server”, it complains while 
parsing this option. Instead we need to have an entry like this (note the 
quotes for the option) –

options : {dns_server="{8.8.8.8, 9.9.9.9}", 
domain_name="\"dom.com\"", lease_time="43200", mtu="1442", router="50.0.0.1", 
server_id="50.0.0.1", server_mac="aa:bb:cc:dd:00:11", tftp_server="\"4.4.4.4\""}



Another alternative might be to support both types for this option. For this, 
we can return a list from “gen_opts_find()” function and enhance the parser 
code to support multiple types.



Are there any guidelines or suggestions to make a schema change such as this?

Thanks,
Dhathri
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Hella

2020-01-08 Thread Mr.Robby
Der Betrag von 2.5000.000,00 wurde Ihnen von Frau Maureen David gespendet. 
Bitte kontaktieren Sie sie per E-Mail: makaltschm...@gmail.com 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] AUTHORS: update email for Lance Richardson

2020-01-08 Thread Ben Pfaff
On Wed, Jan 08, 2020 at 02:51:09PM -0500, Lance Richardson via dev wrote:
> Update email address for Lance Richardson.
> 
> Signed-off-by: Lance Richardson 

Thanks, Lance.  I applied this to master.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v10 1/1] Avoid dp_hash recirculation for balance-tcp bond selection mode

2020-01-08 Thread Ben Pfaff
On Wed, Jan 08, 2020 at 10:48:04AM +0530, Vishal Deep Ajmera wrote:
> Problem:
> 
> In OVS-DPDK, flows with output over a bond interface of type “balance-tcp”
> (using a hash on TCP/UDP 5-tuple) get translated by the ofproto layer into
> "HASH" and "RECIRC" datapath actions. After recirculation, the packet is
> forwarded to the bond member port based on 8-bits of the datapath hash
> value computed through dp_hash. This causes performance degradation in the
> following ways:

Thanks for the patch!

I have a few minor stylistic suggestions, see below.

I'd like to hear Ilya's opinion on this.

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 78f9cf928de2..c3a54e96346e 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -378,11 +378,12 @@ struct dp_netdev {
 
 struct conntrack *conntrack;
 struct pmd_auto_lb pmd_alb;
+
 /* Bonds.
  *
  * Any lookup into 'bonds' requires taking 'bond_mutex'. */
 struct ovs_mutex bond_mutex;
-struct hmap bonds;
+struct hmap bonds;  /* Contains "struct tx_bond"s. */
 };
 
 static void meter_lock(const struct dp_netdev *dp, uint32_t meter_id)
@@ -5581,10 +5582,7 @@ pmd_free_cached_bonds(struct dp_netdev_pmd_thread *pmd)
 
 /* Remove bonds from pmd which no longer exists. */
 HMAP_FOR_EACH_SAFE (bond, next, node, >bond_cache) {
-struct tx_bond *tx = NULL;
-
-tx = tx_bond_lookup(>tx_bonds, bond->bond_id);
-if (!tx) {
+if (!tx_bond_lookup(>tx_bonds, bond->bond_id)) {
 /* Bond no longer exist. Delete it from pmd. */
 hmap_remove(>bond_cache, >node);
 free(bond);
@@ -5601,10 +5599,11 @@ pmd_load_cached_bonds(struct dp_netdev_pmd_thread *pmd)
 pmd_free_cached_bonds(pmd);
 hmap_shrink(>bond_cache);
 
-struct tx_bond *tx_bond, *tx_bond_cached;
+struct tx_bond *tx_bond;
 HMAP_FOR_EACH (tx_bond, node, >tx_bonds) {
 /* Check if bond already exist on pmd. */
-tx_bond_cached = tx_bond_lookup(>bond_cache, tx_bond->bond_id);
+struct tx_bond *tx_bond_cached
+= tx_bond_lookup(>bond_cache, tx_bond->bond_id);
 
 if (!tx_bond_cached) {
 /* Create new bond entry in cache. */
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn 2/2] ovn: Add MLD support.

2020-01-08 Thread Dumitru Ceara
Extend the existing infrastructure used for IPv4 multicast to
IPv6 multicast:
- snoop MLDv1 & MLDv2 reports.
- if multicast querier is configured, generate MLDv2 queries.
- support IPv6 multicast relay.
- support static flood configuration for IPv6 multicast too.

Signed-off-by: Dumitru Ceara 
---
 NEWS|1 
 controller/pinctrl.c|  359 +++--
 lib/logical-fields.c|   33 +++
 lib/ovn-l7.h|   97 
 northd/ovn-northd.8.xml |   22 ++
 northd/ovn-northd.c |  103 ++--
 ovn-nb.xml  |4 
 ovn-sb.ovsschema|5 
 ovn-sb.xml  |5 
 tests/ovn.at|  579 +++
 10 files changed, 1099 insertions(+), 109 deletions(-)

diff --git a/NEWS b/NEWS
index 0ad9677..9243d57 100644
--- a/NEWS
+++ b/NEWS
@@ -4,6 +4,7 @@ Post-OVS-v2.12.0
  independently.
- Added IPv6 NAT support for OVN routers.
- Added Stateless Floating IP support in OVN.
+   - Added support for MLD Snooping and MLD Querier.
 
 v2.12.0 - 03 Sep 2019
 -
diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index c4752c6..cc222a7 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -263,7 +263,7 @@ static void ip_mcast_sync(
 struct ovsdb_idl_index *sbrec_igmp_groups,
 struct ovsdb_idl_index *sbrec_ip_multicast)
 OVS_REQUIRES(pinctrl_mutex);
-static void pinctrl_ip_mcast_handle_igmp(
+static void pinctrl_ip_mcast_handle(
 struct rconn *swconn,
 const struct flow *ip_flow,
 struct dp_packet *pkt_in,
@@ -1908,8 +1908,8 @@ process_packet_in(struct rconn *swconn, const struct 
ofp_header *msg)
);
 break;
 case ACTION_OPCODE_IGMP:
-pinctrl_ip_mcast_handle_igmp(swconn, , ,
- _metadata, );
+pinctrl_ip_mcast_handle(swconn, , , _metadata,
+);
 break;
 
 case ACTION_OPCODE_PUT_ARP:
@@ -3198,32 +3198,55 @@ pinctrl_compose_ipv4(struct dp_packet *packet, struct 
eth_addr eth_src,
 packet->packet_type = htonl(PT_ETH);
 
 struct eth_header *eh = dp_packet_put_zeros(packet, sizeof *eh);
-eh->eth_dst = eth_dst;
-eh->eth_src = eth_src;
-
 struct ip_header *nh = dp_packet_put_zeros(packet, sizeof *nh);
 
+eh->eth_dst = eth_dst;
+eh->eth_src = eth_src;
 eh->eth_type = htons(ETH_TYPE_IP);
 dp_packet_set_l3(packet, nh);
 nh->ip_ihl_ver = IP_IHL_VER(5, 4);
-nh->ip_tot_len = htons(sizeof(struct ip_header) + ip_payload_len);
+nh->ip_tot_len = htons(sizeof *nh + ip_payload_len);
 nh->ip_tos = IP_DSCP_CS6;
 nh->ip_proto = ip_proto;
 nh->ip_frag_off = htons(IP_DF);
 
-/* Setting tos and ttl to 0 and 1 respectively. */
 packet_set_ipv4(packet, ipv4_src, ipv4_dst, 0, ttl);
 
 nh->ip_csum = 0;
 nh->ip_csum = csum(nh, sizeof *nh);
 }
 
+static void
+pinctrl_compose_ipv6(struct dp_packet *packet, struct eth_addr eth_src,
+ struct eth_addr eth_dst, struct in6_addr *ipv6_src,
+ struct in6_addr *ipv6_dst, uint8_t ip_proto, uint8_t ttl,
+ uint16_t ip_payload_len)
+{
+dp_packet_clear(packet);
+packet->packet_type = htonl(PT_ETH);
+
+struct eth_header *eh = dp_packet_put_zeros(packet, sizeof *eh);
+struct ip6_hdr *nh = dp_packet_put_zeros(packet, sizeof *nh);
+
+eh->eth_dst = eth_dst;
+eh->eth_src = eth_src;
+eh->eth_type = htons(ETH_TYPE_IPV6);
+dp_packet_set_l3(packet, nh);
+
+nh->ip6_vfc = 0x60;
+nh->ip6_nxt = ip_proto;
+nh->ip6_plen = htons(ip_payload_len);
+
+packet_set_ipv6(packet, ipv6_src, ipv6_dst, 0, 0, ttl);
+}
+
 /*
  * Multicast snooping configuration.
  */
 struct ip_mcast_snoop_cfg {
 bool enabled;
-bool querier_enabled;
+bool querier_v4_enabled;
+bool querier_v6_enabled;
 
 uint32_t table_size;   /* Max number of allowed multicast groups. */
 uint32_t idle_time_s;  /* Idle timeout for multicast groups. */
@@ -3231,10 +3254,19 @@ struct ip_mcast_snoop_cfg {
 uint32_t query_max_resp_s; /* Multicast query max-response field. */
 uint32_t seq_no;   /* Used for flushing learnt groups. */
 
-struct eth_addr query_eth_src; /* Src ETH address used for queries. */
-struct eth_addr query_eth_dst; /* Dst ETH address used for queries. */
-ovs_be32 query_ipv4_src;   /* Src IPv4 address used for queries. */
-ovs_be32 query_ipv4_dst;   /* Dsc IPv4 address used for queries. */
+struct eth_addr query_eth_src;/* Src ETH address used for queries. */
+struct eth_addr query_eth_v4_dst; /* Dst ETH address used for IGMP
+   * queries.
+   */
+struct eth_addr query_eth_v6_dst; /* Dst ETH address used for MLD
+   * queries.
+   */
+

[ovs-dev] [PATCH ovn 1/2] ovn-northd: Fix ipv4.mcast logical field.

2020-01-08 Thread Dumitru Ceara
Signed-off-by: Dumitru Ceara 
---
 lib/logical-fields.c |3 ++-
 northd/ovn-northd.c  |2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/lib/logical-fields.c b/lib/logical-fields.c
index 8fb591c..5748b67 100644
--- a/lib/logical-fields.c
+++ b/lib/logical-fields.c
@@ -158,7 +158,8 @@ ovn_init_symtab(struct shash *symtab)
 expr_symtab_add_field(symtab, "ip4.dst", MFF_IPV4_DST, "ip4", false);
 expr_symtab_add_predicate(symtab, "ip4.src_mcast",
   "ip4.src[28..31] == 0xe");
-expr_symtab_add_predicate(symtab, "ip4.mcast", "ip4.dst[28..31] == 0xe");
+expr_symtab_add_predicate(symtab, "ip4.mcast",
+  "eth.mcast && ip4.dst[28..31] == 0xe");
 
 expr_symtab_add_predicate(symtab, "icmp4", "ip4 && ip.proto == 1");
 expr_symtab_add_field(symtab, "icmp4.type", MFF_ICMPV4_TYPE, "icmp4",
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index d91a008..dd7fdcc 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -6229,7 +6229,7 @@ build_lswitch_flows(struct hmap *datapaths, struct hmap 
*ports,
  * ports - RFC 4541, section 2.1.2, item 2.
  */
 ovn_lflow_add(lflows, od, S_SWITCH_IN_L2_LKUP, 85,
-  "ip4 && ip4.dst == 224.0.0.0/24",
+  "ip4.mcast && ip4.dst == 224.0.0.0/24",
   "outport = \""MC_FLOOD"\"; output;");
 
 /* Forward uregistered IP multicast to routers with relay enabled

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn 0/2] Add MLD support.

2020-01-08 Thread Dumitru Ceara
The first patch of the series is a minor fix of how IP multicast traffic
is matched.

The second patch extends the already existing IPv4 Multicast support
(IGMP snooping, IGMP querier, relay and static flood config) to IPv6
by implementing MLDv1 & MLDv2 snooping and querier.

Signed-off-by: Dumitru Ceara 

Dumitru Ceara (2):
  ovn-northd: Fix ipv4.mcast logical field.
  ovn: Add MLD support.


 NEWS|1 
 controller/pinctrl.c|  359 +++--
 lib/logical-fields.c|   36 +++
 lib/ovn-l7.h|   97 
 northd/ovn-northd.8.xml |   22 ++
 northd/ovn-northd.c |  105 ++---
 ovn-nb.xml  |4 
 ovn-sb.ovsschema|5 
 ovn-sb.xml  |5 
 tests/ovn.at|  579 +++
 10 files changed, 1102 insertions(+), 111 deletions(-)


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] AUTHORS: update email for Lance Richardson

2020-01-08 Thread Lance Richardson via dev
Update email address for Lance Richardson.

Signed-off-by: Lance Richardson 
---
 .mailmap| 1 +
 AUTHORS.rst | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.mailmap b/.mailmap
index 4688ab0cd..894062d48 100644
--- a/.mailmap
+++ b/.mailmap
@@ -53,6 +53,7 @@ Joe Stringer  
 Justin Pettit  
 Kmindg 
 Kyle Mestery  
+Lance Richardson  
 Mauricio Vasquez  

 Miguel Angel Ajo  
 Neil McKee 
diff --git a/AUTHORS.rst b/AUTHORS.rst
index 72c8307d5..a93b94354 100644
--- a/AUTHORS.rst
+++ b/AUTHORS.rst
@@ -221,7 +221,7 @@ Kyle Mestery   mest...@mestery.com
 Kyle Simpson   kyleandrew.simp...@gmail.com
 Kyle Upton kup...@baymicrosystems.com
 Lance Yang lance.y...@arm.com
-Lance Richardson   lrich...@redhat.com
+Lance Richardson   lance.richard...@broadcom.com
 Lars Kellogg-Stedman   l...@redhat.com
 Lei Huang  huang.f@gmail.com
 Leif Madsenlmad...@redhat.com
-- 
2.17.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V6 00/18] netdev datapath actions offload

2020-01-08 Thread Eli Britstein


On 1/8/2020 7:54 PM, Hemal Shah wrote:
Ilya and Eli,

I have some additional comments on the v6 series.

  1.  The description says that tunnel push action is supported by this series 
but v3-v4 note says that - dropped clone commit - will be posted in the next 
series. In that case, should the description of patcheset be updated to not 
include generic tunnel push action?

Right. Sorry. I'll remove this description in v7.

  1.  Can you outline the procedure for testing output action for 
VF-representors? How will this patchset be used in conjunction with 
VF-representors? It is not clear from the description.

Yes. You should attach the representors as OVS ports, and set hw-offload=true. 
VFs can be used by VMs, for example. When a packet is placed on a VF (for 
example by a VM), it is received by OVS by the representor, according to the 
default miss rule in HW. OVS configure flows for its ports (i.e. representors), 
and those are configured to the HW by rte_flow (in this series). Next packets 
in those flows are handled by HW, and not received by the representors, so 
transparent to OVS. Still, OVS query the flow statistics, in order to keep the 
flow alive as long as there is traffic, and age it out after a timeout with no 
traffic.
Hemal

On Tue, Jan 7, 2020 at 10:47 AM Ilya Maximets 
mailto:i.maxim...@ovn.org>> wrote:
On 19.12.2019 12:54, Eli Britstein wrote:
> Currently, netdev datapath offload only accelerates the flow match
> sequence by associating a mark per flow. This series introduces the full
> offload of netdev datapath flows by having the HW also perform the flow
> actions.
>
> This series adds HW offload for output, drop, set MAC, set IPv4, set
> TCP/UDP ports and tunnel push actions using the DPDK rte_flow API.
>
> v1 Travis passed: 
> https://travis-ci.org/elibritstein/OVS/builds/614511472
> v2 Travis passed: 
> https://travis-ci.org/elibritstein/OVS/builds/619262148
> v3 Travis passed: 
> https://travis-ci.org/elibritstein/OVS/builds/622253870
> v4 Travis passed: 
> https://travis-ci.org/elibritstein/OVS/builds/625654160
> v5 Travis passed: 
> https://travis-ci.org/elibritstein/OVS/builds/626692202
> v6 Travis passed: 
> https://travis-ci.org/elibritstein/OVS/builds/627132194
>
> v1-v2:
> - fallback to mark/rss in case of any failure of action offload.
> - dp_layer changed to "in_hw" in case the flow is offloaded.
> - using netdev_ports_get instead of dp_netdev_lookup_port in stats reading.
> - using flow_dump_next API instead of a new API.
> - validity checks for last action of output and clone.
> - count action should be done for drop as well.
> - validity check for output action.
> - added doc in Documentation/howto/dpdk.rst.
> v2-v3:
> - removed refactoring for netdev-offload-dpdk-flow.c file.
> - dropped flush commits.
> - dbg prints rate-limited, and outside lock.
> - added validity check for output action - same flow_api handle for both 
> netdevs.
> - added a mutex to protect possible concurrent deletion/querying of a flow.
> - statistics are collected using flow_get API.
> - added more documentation in Documentation/howto/dpdk.rst.
> v3-v4:
> - debug prints moved to 

Re: [ovs-dev] [ovs-dev v2] dpif-netdev: Allow to set max capacity of flow on netdev

2020-01-08 Thread Ben Pfaff
On Wed, Jan 08, 2020 at 05:32:48PM +0800, xiangxia.m@gmail.com wrote:
> From: Tonghao Zhang 
> 
> For installing more than MAX_FLOWS (65536) flows to netdev datapath.
> Add the ovs-appctl subcommand "dpif-netdev/pmd-set-max-flow" which
> can change the flow number which netdev datapath support.
> 
> Signed-off-by: Tonghao Zhang 
> ---
> v2:
> * change int type to atomic_uint32_t
> * check max flow number is whether valid (0 < max-flow < UINT_MAX).

Thanks.  I suggest folding in the following mainly stylistic changes.

Also, please document the new commands in lib/dpif-netdev-unixctl.man
and mention them in NEWS.

Thanks,

Ben.

-8<--cut here-->8--

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 38f0c65ab2e4..71aee1f7203f 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1133,9 +1133,9 @@ dpif_netdev_set_max_flow(struct unixctl_conn *conn,
 {
 long long max_flow = atoll(argv[1]);
 
-if (max_flow <= 0 || max_flow >= UINT_MAX) {
+if (max_flow <= 0 || max_flow >= UINT32_MAX) {
 unixctl_command_reply_error(conn,
-"max-flow should: > 0 and < UINT_MAX\n");
+"max-flow should: > 0 and < UINT_MAX");
 return;
 }
 
@@ -1149,14 +1149,12 @@ dpif_netdev_show_max_flow(struct unixctl_conn *conn,
   const char *argv[] OVS_UNUSED,
   void *aux OVS_UNUSED)
 {
-struct ds reply = DS_EMPTY_INITIALIZER;
 uint32_t max_flow;
-
 atomic_read_relaxed(_max_flow, _flow);
 
-ds_put_format(,"%u\n", max_flow);
-unixctl_command_reply(conn, ds_cstr());
-ds_destroy();
+char *reply = xasprintf("%u", max_flow);
+unixctl_command_reply(conn, reply);
+free(reply);
 }
 
 static int
@@ -3390,7 +3388,6 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 struct dpif_flow_stats *stats)
 {
 struct dp_netdev_flow *netdev_flow;
-uint32_t max_flow;
 int error = 0;
 
 if (stats) {
@@ -3401,6 +3398,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
 if (!netdev_flow) {
 if (put->flags & DPIF_FP_CREATE) {
+uint32_t max_flow;
 atomic_read_relaxed(_max_flow, _flow);
 if (cmap_count(>flow_table) < max_flow) {
 dp_netdev_flow_add(pmd, match, ufid, put->actions,
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS 2.13 Soft Freeze date

2020-01-08 Thread Ben Pfaff
On Wed, Jan 08, 2020 at 05:33:59PM +, Stokes, Ian wrote:
> 
> 
> On 1/8/2020 11:09 AM, Stokes, Ian wrote:
> > Hi Ben,
> > 
> > just wanted to confirm if the soft freeze for OVS 2.13 is still
> > confirmed for January 15th? Just a had a few queries so thought it would
> > be best to ask.
> 
> Apologies, was just pointed out to me on the community call, that soft
> freeze was January 1st, I meant to refer to the feature freeze.

I haven't been paying good enough attention.  Apologies.

Let's set the feature freeze to January 17 to round off the week to
Friday.

Thanks,

Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [v1] dpif-netdev: Add ovs-appctl dpif-netdev/subtable-show.

2020-01-08 Thread Stokes, Ian



On 1/8/2020 5:56 PM, Ilya Maximets wrote:

On 08.01.2020 18:37, Stokes, Ian wrote:



On 1/8/2020 4:38 PM, Ilya Maximets wrote:

On 07.01.2020 11:51, Stokes, Ian wrote:



On 1/2/2020 12:27 PM, Ilya Maximets wrote:

On 20.12.2019 16:28, Emma Finn wrote:

Add an ovs-appctl command to iterate through the dpcls
and for each subtable output the miniflow bits for any
existing table.

$ ovs-appctl dpif-netdev/subtable-show
pmd thread numa_id 0
     dpcls port 2:
   subtable:
     unit_0: 2 (0x5)
     unit_1: 1 (0x1)
pmd thread numa_id 1
     dpcls port 3:
   subtable:
     unit_0: 2 (0x5)
     unit_1: 1 (0x1)

Signed-off-by: Emma Finn 

---


So, what about my suggestions and thoughts about alternative solutions
that I posted in reply to RFC?  It's still unclear why we need to disturb
the running datapath to get this information, especially if we could get
it "offline" from the flow dump.


Hi Ilya,

apologies I've only spotted this post now.

I guess to my mind there are a few reasons why this is being added as a new 
command and not a separate application to process/parse the flow dumps of a 
datapath.

(i) Ease of implementation, it seems straight forward enough to follow the 
existing commands structure such as dpif-netdev/pmd-rxq-show to implement this 
command. It had the required elements (required data structures etc.) so 
minimum plumbing was required to get access to that info and it will be 
familiar to any other developers who have already worked or will work in that 
area of the code in the future.

I agree this could be done offline without the need to lock the datapath, but 
from my understanding I don't think the intention here is to run this command 
at a frequent or high interval so I would think that the lock should not be an 
issue unless the command is being executed continually (again similar to 
pmd-rxq-show, it would be called when needed only).

The concern of adding a new separate application for parsing the dump flow as 
you suggested came down to it being another separate app within OVS to maintain 
as well as the work required to plumb or parse all required info.


It's not hard to parse.  Just a few function calls.  Most of the required
functionality exists in 'lib' code.

If you don't like a separate app or script, this information could be printed


I think it would be better to keep it in OVS rather than a separate app/script.


in a flow-dump in a some special field named like 'dp-extra-info' or whatever.
Like this:
    
recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), 
dp-extra-info:miniflow_bits(0x5, 0x1), actions:hash(sym_l4(0)),recirc(0x1)



This is interesting approach, but are we missing info such as the in ports, 
numa nodes etc? Maybe Harry can comment to this as it was his idea originally. 
It seems with either approach you will be missing some info depending on how 
you want to approach the debug usecase.


We're not missing core info since flow dumps are printed per-PMD:

flow-dump from the main thread:
<...>
flow-dump from the pmd thread on core X:
<...>

numa could be easily checked by by the core number and I'm not sure
if we really need this information here.

Flow dumps always contains 'in_port' fileld and the port name will
be printed instead of port number if you'll pass --names to the dump command.




'dp-extra-info' might be printed with verbosity enabled if dpif provided this
information.


I haven't looked at ovs-ofctl myself, not sure how much work would be needed to 
access this info or if you are back to parsing, again I think the idea here is 
we have this info available in the current approach as is why parse and break 
out these values?


Not ovs-ofctl - It dumps OF tables, not the datapath flow tables - but
'ovs-dpctl dump-flows' or 'ovs-appctl dpctl/dump-flows'.  dpctl and appctl
are using same code from lib/dpctl.c which is fairly simple.  I'm suggesting
to add this information to dpif_flow.attrs and dpif-netdev will fill this
information while sending dumped flow in dp_netdev_flow_to_dpif_flow().
Should not be hard.
It's not parsing and breaking out, we have a netdev_flow while dumping
and we only need to store a bit more information from it into dpif_flow.


Now I'm on the same page as you, apologies, previously thought you were 
referring to ofctl. This approach seems more appropriate.






Would it even be acceptable for 2.13 as well? it would be a new v1 and we're 
past the soft freeze.


Technically, soft freeze wasn't announced yet. =)
And we also could make it a v3.


+1 :D







I really feel that this information (miniflow bits) should be bonded with 
flow-dump
somehow just because it belongs there.


I guess we're looking at this from different points of view, don't shoot me but 
would it make sense to have both approaches? :-D


Not sure about that.  I'd like to not have both.



No worries, I understand what you meant better now so agree it will be 
fine in one place.


Best 

Re: [ovs-dev] [PATCH v4 ovn 0/2] Add IPv6 Prefix delegation (RFC3633)

2020-01-08 Thread Lorenzo Bianconi
> On Fri, Dec 20, 2019 at 5:02 PM Lorenzo Bianconi
>  wrote:
> >
> > Introduce IPv6 Prefix delegation state machine according to RFC 3633
> > https://tools.ietf.org/html/rfc3633.
> > Add handle_dhcpv6_reply controller action to parse advertise/reply from
> > IPv6 delegation server.
> > Introduce logical flows in ovn router pipeline in order to parse dhcpv6
> > advertise/reply from IPv6 prefix delegation router.
> > This series relies on the following OVS commit:
> > https://github.com/openvswitch/ovs/commit/cec89046f72cb044b068ba6a4e30dbcc4292c4c1
> >
> 
> Hi Lorenzo,
> 

Hi Numan,

thx for reviewing and testing the series :)

> I tested this patch series. And I don't think it is working as expected.
> 
> My test setup has the below ovn resources
> 
> **
> witch 6f57f575-1dc0-463e-8c04-7bcee6697cea (public)
> port public-lr0
> type: router
> router-port: lr0-public
> port ln-public
> type: localnet
> addresses: ["unknown"]
> switch 1fa5de75-fcae-49a8-9eca-55ed274cc100 (sw0)
> port sw0-port1
> addresses: ["50:54:00:00:00:03 10.0.0.3"]
> port sw0-lr0
> type: router
> router-port: lr0-sw0
> switch 73af6b0e-8156-446e-8419-83e5ecc323ee (sw1)
> port sw1-lr0
> type: router
> router-port: lr0-sw1
> port sw1-port1
> addresses: ["40:54:00:00:00:03 20.0.0.3"]
> router 87c8d985-3a42-423b-8ed0-d4a5b4cfb5ac (lr0)
> port lr0-public
> mac: "00:00:20:20:12:13"
> networks: ["172.16.0.100/24", "2001:db8:::1/64"]
> gateway chassis: [ovn-gw-1]
> port lr0-sw1
> mac: "00:00:00:00:ff:02"
> networks: ["20.0.0.1/24"]
> port lr0-sw0
> mac: "00:00:00:00:ff:01"
> networks: ["10.0.0.1/24"]
> **
> 
> When I enabled prefix delegation on lr-public, lr0-sw1 and lr0-sw0,
> ovn-controller (on ovn-gw-1) didn't
> send the IPv6 PD messages.  I had to add an IPv6 address on lr0-public
> router port for ovn-controller
> to start sending IPv6 PD  messages.
> 
> ***
> ovn-nbctl add logical_router_port lr0-public networks 
> "2001\:db8\:\:\:1/64"
> ***
> 
> I think that should not be the case. If prefix_delegation=true is set
> on a lrp, then ovn-controller should use the
> IPv6 link local address (which is derived from the mac) instead of
> expecting CMS to configure an IPv6 address.
> 
> In my case, the logical router port - lr0-sw0 never received any IPv6
> prefix. lr0-sw1 did receive.

ack, I will fix it in v5

> 
> In my testing, ovn-controller crashed with the below trace when I ran
> the above "ovn-nbctl add logical_router_port ..." command
> 
> ***
> (gdb) bt
> #0  0x004b7875 in skiplist_get_data (node=node@entry=0x786966)
> at ../lib/skiplist.c:212
> #1  0x004aa457 in ovsdb_idl_cursor_next_eq
> (cursor=cursor@entry=0x7ffe28971090) at ../lib/ovsdb-idl.c:2982
> #2  0x0041f2fd in prepare_ipv6_prefixd
> (active_tunnels=0x1f3dc60, chassis=0x1f9f9d0,
> local_datapaths=0x1f3dc00,
> sbrec_port_binding_by_name=0x1f12cd0,
> sbrec_port_binding_by_datapath=,
> ovnsb_idl_txn=0x2036cb0)
> at ../controller/pinctrl.c:3257
> #3  pinctrl_run (ovnsb_idl_txn=ovnsb_idl_txn@entry=0x2036cb0,
> 
> sbrec_datapath_binding_by_key=sbrec_datapath_binding_by_key@entry=0x1f3f940,
> 
> sbrec_port_binding_by_datapath=sbrec_port_binding_by_datapath@entry=0x1f134b0,
> sbrec_port_binding_by_key=sbrec_port_binding_by_key@entry=0x1f12e80,
> sbrec_port_binding_by_name=sbrec_port_binding_by_name@entry=0x1f12cd0,
> 
> sbrec_mac_binding_by_lport_ip=sbrec_mac_binding_by_lport_ip@entry=0x1f3faf0,
> sbrec_igmp_groups=0x1f3d060,
> sbrec_ip_multicast_opts=0x1f3fc80, dns_table=0x1f48f50,
> ce_table=0x1f48f50, svc_mon_table=0x1f48f50,
> br_int=, chassis=0x1f9f9d0,
> local_datapaths=0x1f3dc00, active_tunnels=0x1f3dc60)
> at ../controller/pinctrl.c:2510
> #4  0x00408536 in main (argc=, argv= out>) at ../controller/ovn-controller.c:2136
> 

ack, I will fix it in v5

> 
> This patch series stores the received PD in the
> port_binding.options:ipv6_ra_pd_list column.
> CMS will not come to know about the configured PD. Ideally all this
> should be transparent to CMS.
> When ovn-controller stores the received PD in the
> port_binding.options, ovn-northd should read this
> value and store it in logical_Router_port addresses column (or
> probably a new column) to indicate CMS
> that prefix is configured for this router port.

ack, I will add it in v5

> 
> And if CMS has enabled, router advertisement for a  router port,
> ovn-controller(s) should start sending RAs
> for the ports which belong to the logical switches.
> 
> As suggested earlier, please enhance system-ovn test case to add
> another logical router port to
> the router R1 and make sure that all the logical router ports get
> separate prefixes.

I guess I have already done it but probably the name convention I used is a
little bit messy. I will improve it in v5.

Regards,

Re: [ovs-dev] [PATCH V6 09/18] dpctl: Support dump-flows filters "dpdk" and "partially-offloaded"

2020-01-08 Thread Ilya Maximets
On 08.01.2020 18:25, Eli Britstein wrote:
> 
> On 1/8/2020 2:17 PM, Ilya Maximets wrote:
>> On 19.12.2019 12:54, Eli Britstein wrote:
>>> Flows that are offloaded via DPDK can be partially offloaded (matches
>>> only) or fully offloaded (matches and actions). Set partially offloaded
>>> display to (offloaded=partial, dp:ovs), and fully offloaded to
>>> (offloaded=yes, dp:dpdk). Also support filter types "dpdk" and
>>> "partially-offloaded".
>>>
>>> Signed-off-by: Eli Britstein 
>>> ---
>>>   NEWS  |  3 ++
>>>   lib/dpctl.c   | 97 
>>> ---
>>>   lib/dpctl.man |  2 ++
>>>   3 files changed, 78 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/NEWS b/NEWS
>>> index 2acde3fe8..85c4a4812 100644
>>> --- a/NEWS
>>> +++ b/NEWS
>>> @@ -26,6 +26,9 @@ Post-v2.12.0
>>>* DPDK ring ports (dpdkr) are deprecated and will be removed in next
>>>  releases.
>>>* Add support for DPDK 19.11.
>>> +   - 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
>>> + partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
>>> + type filter supports new filters: "dpdk" and "partially-offloaded".
>>>   
>>>   v2.12.0 - 03 Sep 2019
>>>   -
>>> diff --git a/lib/dpctl.c b/lib/dpctl.c
>>> index 0ee64718c..387f61ed4 100644
>>> --- a/lib/dpctl.c
>>> +++ b/lib/dpctl.c
>>> @@ -818,7 +818,11 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow 
>>> *f, struct hmap *ports,
>>>   
>>>   dpif_flow_stats_format(>stats, ds);
>>>   if (dpctl_p->verbosity && f->attrs.offloaded) {
>>> -ds_put_cstr(ds, ", offloaded:yes");
>>> +if (f->attrs.dp_layer && !strcmp(f->attrs.dp_layer, "ovs")) {
>>> +ds_put_cstr(ds, ", offloaded:partial");
>>> +} else {
>>> +ds_put_cstr(ds, ", offloaded:yes");
>>> +}
>>>   }
>>>   if (dpctl_p->verbosity && f->attrs.dp_layer) {
>>>   ds_put_format(ds, ", dp:%s", f->attrs.dp_layer);
>>> @@ -827,20 +831,30 @@ format_dpif_flow(struct ds *ds, const struct 
>>> dpif_flow *f, struct hmap *ports,
>>>   format_odp_actions(ds, f->actions, f->actions_len, ports);
>>>   }
>>>   
>>> +enum dp_type {
>>> +DP_TYPE_ANY,
>>> +DP_TYPE_OVS,
>>> +DP_TYPE_TC,
>>> +DP_TYPE_DPDK,
>>> +};
>>> +
>>> +enum ol_type {
>>> +OL_TYPE_ANY,
>>> +OL_TYPE_NO,
>>> +OL_TYPE_YES,
>>> +OL_TYPE_PARTIAL,
>>> +};
>>> +
>>>   struct dump_types {
>>> -bool ovs;
>>> -bool tc;
>>> -bool offloaded;
>>> -bool non_offloaded;
>>> +enum dp_type dp_type;
>>> +enum ol_type ol_type;
>>>   };
>>>   
>>>   static void
>>>   enable_all_dump_types(struct dump_types *dump_types)
>>>   {
>>> -dump_types->ovs = true;
>>> -dump_types->tc = true;
>>> -dump_types->offloaded = true;
>>> -dump_types->non_offloaded = true;
>>> +dump_types->dp_type = DP_TYPE_ANY;
>>> +dump_types->ol_type = OL_TYPE_ANY;
>>>   }
>>>   
>>>   static int
>>> @@ -862,13 +876,17 @@ populate_dump_types(char *types_list, struct 
>>> dump_types *dump_types,
>>>   current_type[type_len] = '\0';
>>>   
>>>   if (!strcmp(current_type, "ovs")) {
>>> -dump_types->ovs = true;
>>> +dump_types->dp_type = DP_TYPE_OVS;
>>>   } else if (!strcmp(current_type, "tc")) {
>>> -dump_types->tc = true;
>>> +dump_types->dp_type = DP_TYPE_TC;
>>> +} else if (!strcmp(current_type, "dpdk")) {
>>> +dump_types->dp_type = DP_TYPE_DPDK;
>>>   } else if (!strcmp(current_type, "offloaded")) {
>>> -dump_types->offloaded = true;
>>> +dump_types->ol_type = OL_TYPE_YES;
>>>   } else if (!strcmp(current_type, "non-offloaded")) {
>>> -dump_types->non_offloaded = true;
>>> +dump_types->ol_type = OL_TYPE_NO;
>>> +} else if (!strcmp(current_type, "partially-offloaded")) {
>>> +dump_types->ol_type = OL_TYPE_PARTIAL;
>>>   } else if (!strcmp(current_type, "all")) {
>>>   enable_all_dump_types(dump_types);
>>>   } else {
>>> @@ -884,30 +902,61 @@ static void
>>>   determine_dpif_flow_dump_types(struct dump_types *dump_types,
>>>  struct dpif_flow_dump_types 
>>> *dpif_dump_types)
>>>   {
>>> -dpif_dump_types->ovs_flows = dump_types->ovs || 
>>> dump_types->non_offloaded;
>>> -dpif_dump_types->netdev_flows = dump_types->tc || dump_types->offloaded
>>> -|| dump_types->non_offloaded;
>>> +dpif_dump_types->ovs_flows = dump_types->dp_type == DP_TYPE_OVS;
>>> +dpif_dump_types->netdev_flows = (dump_types->dp_type == DP_TYPE_TC ||
>>> + dump_types->dp_type == DP_TYPE_DPDK);
>>>   }
>> Above code doesn't handle DP_TYPE_ANY.  This mostly breaks dump-flows
>> command if no type specified.
> Correct. I already fixed it today. That was the first 

Re: [ovs-dev] [PATCH V6 00/18] netdev datapath actions offload

2020-01-08 Thread Hemal Shah via dev
Ilya and Eli,

I have some additional comments on the v6 series.

   1. The description says that tunnel push action is supported by this
   series but v3-v4 note says that - dropped clone commit - will be posted in
   the next series. In that case, should the description of patcheset be
   updated to not include generic tunnel push action?
   2. Can you outline the procedure for testing output action for
   VF-representors? How will this patchset be used in conjunction with
   VF-representors? It is not clear from the description.

Hemal

On Tue, Jan 7, 2020 at 10:47 AM Ilya Maximets  wrote:

> On 19.12.2019 12:54, Eli Britstein wrote:
> > Currently, netdev datapath offload only accelerates the flow match
> > sequence by associating a mark per flow. This series introduces the full
> > offload of netdev datapath flows by having the HW also perform the flow
> > actions.
> >
> > This series adds HW offload for output, drop, set MAC, set IPv4, set
> > TCP/UDP ports and tunnel push actions using the DPDK rte_flow API.
> >
> > v1 Travis passed:
> https://travis-ci.org/elibritstein/OVS/builds/614511472
> > v2 Travis passed:
> https://travis-ci.org/elibritstein/OVS/builds/619262148
> > v3 Travis passed:
> https://travis-ci.org/elibritstein/OVS/builds/622253870
> > v4 Travis passed:
> https://travis-ci.org/elibritstein/OVS/builds/625654160
> > v5 Travis passed:
> https://travis-ci.org/elibritstein/OVS/builds/626692202
> > v6 Travis passed:
> https://travis-ci.org/elibritstein/OVS/builds/627132194
> >
> > v1-v2:
> > - fallback to mark/rss in case of any failure of action offload.
> > - dp_layer changed to "in_hw" in case the flow is offloaded.
> > - using netdev_ports_get instead of dp_netdev_lookup_port in stats
> reading.
> > - using flow_dump_next API instead of a new API.
> > - validity checks for last action of output and clone.
> > - count action should be done for drop as well.
> > - validity check for output action.
> > - added doc in Documentation/howto/dpdk.rst.
> > v2-v3:
> > - removed refactoring for netdev-offload-dpdk-flow.c file.
> > - dropped flush commits.
> > - dbg prints rate-limited, and outside lock.
> > - added validity check for output action - same flow_api handle for both
> netdevs.
> > - added a mutex to protect possible concurrent deletion/querying of a
> flow.
> > - statistics are collected using flow_get API.
> > - added more documentation in Documentation/howto/dpdk.rst.
> > v3-v4:
> > - debug prints moved to netdev-offload-dpdk.c.
> > - flow get returns full stats, not diff.
> > - removed additional fields from offload_info and dp_netdev_flow.
> > - read HW stats during dpif_netdev_flow_get as well as
> >   dpif_netdev_flow_dump_next.
> > - moved actions offload framework just before support commit.
> > - fixed memory leak in rewrite code.
> > - dropped clone commit - will be posted in next series.
> > v4-v5:
> > - statistics collecting changed to populate dp_layer and offloaded
> >   attributes. dp_layer changed to "dpdk" for fully offloaded by dpdk.
> > - display offloaded:partial for partially offloaded flows.
> > - support filter types "dpdk" and "partially-offloaded" in
> >   dpctl/dump-flows type=X.
> > - simplify code readability of rewrite commits.
> > v5-v6:
> > - debug prints improvement for partial offload installations. dbg prints
> >   for failed actions, warn for other failure.
> > - fixed memory leak in case retrieved dpdk port-id is invalid.
> >
> > Eli Britstein (17):
> >   netdev-offload-dpdk: Refactor flow patterns
> >   netdev-offload-dpdk: Refactor action items freeing scheme
> >   netdev-offload-dpdk: Dynamically allocate pattern items
> >   netdev-offload-dpdk: Improve HW offload flow debuggability
> >   netdev-dpdk: Introduce rte flow query function
> >   netdev-offload-dpdk: Return UFID-rte_flow entry in find method
> >   netdev-offload-dpdk: Framework for actions offload
> >   netdev-offload-dpdk: Implement flow get method
> >   dpctl: Support dump-flows filters "dpdk" and "partially-offloaded"
> >   dpif-netdev: Populate dpif class field in offload struct
> >   netdev-dpdk: Getter function for dpdk port id API
> >   netdev-offload: Introduce a function to validate same flow api handle
> >   netdev-offload-dpdk: Support offload of output action
> >   netdev-offload-dpdk: Support offload of drop action
> >   netdev-offload-dpdk: Support offload of set MAC actions
> >   netdev-offload-dpdk: Support offload of set IPv4 actions
> >   netdev-offload-dpdk: Support offload of set TCP/UDP ports actions
> >
> > Ophir Munk (1):
> >   dpif-netdev: Update offloaded flows statistics
> >
> >  Documentation/howto/dpdk.rst  |  21 +-
> >  NEWS  |   5 +
> >  lib/dpctl.c   |  97 +++--
> >  lib/dpctl.man |   2 +
> >  lib/dpif-netdev.c |  81 +++-
> >  lib/netdev-dpdk.c |  48 +++
> >  lib/netdev-dpdk.h |   8 +
> >  lib/netdev-offload-dpdk.c | 964
> 

Re: [ovs-dev] [v1] dpif-netdev: Add ovs-appctl dpif-netdev/subtable-show.

2020-01-08 Thread Ilya Maximets
On 08.01.2020 18:37, Stokes, Ian wrote:
> 
> 
> On 1/8/2020 4:38 PM, Ilya Maximets wrote:
>> On 07.01.2020 11:51, Stokes, Ian wrote:
>>>
>>>
>>> On 1/2/2020 12:27 PM, Ilya Maximets wrote:
 On 20.12.2019 16:28, Emma Finn wrote:
> Add an ovs-appctl command to iterate through the dpcls
> and for each subtable output the miniflow bits for any
> existing table.
>
> $ ovs-appctl dpif-netdev/subtable-show
> pmd thread numa_id 0
>     dpcls port 2:
>   subtable:
>     unit_0: 2 (0x5)
>     unit_1: 1 (0x1)
> pmd thread numa_id 1
>     dpcls port 3:
>   subtable:
>     unit_0: 2 (0x5)
>     unit_1: 1 (0x1)
>
> Signed-off-by: Emma Finn 
>
> ---

 So, what about my suggestions and thoughts about alternative solutions
 that I posted in reply to RFC?  It's still unclear why we need to disturb
 the running datapath to get this information, especially if we could get
 it "offline" from the flow dump.
>>>
>>> Hi Ilya,
>>>
>>> apologies I've only spotted this post now.
>>>
>>> I guess to my mind there are a few reasons why this is being added as a new 
>>> command and not a separate application to process/parse the flow dumps of a 
>>> datapath.
>>>
>>> (i) Ease of implementation, it seems straight forward enough to follow the 
>>> existing commands structure such as dpif-netdev/pmd-rxq-show to implement 
>>> this command. It had the required elements (required data structures etc.) 
>>> so minimum plumbing was required to get access to that info and it will be 
>>> familiar to any other developers who have already worked or will work in 
>>> that area of the code in the future.
>>>
>>> I agree this could be done offline without the need to lock the datapath, 
>>> but from my understanding I don't think the intention here is to run this 
>>> command at a frequent or high interval so I would think that the lock 
>>> should not be an issue unless the command is being executed continually 
>>> (again similar to pmd-rxq-show, it would be called when needed only).
>>>
>>> The concern of adding a new separate application for parsing the dump flow 
>>> as you suggested came down to it being another separate app within OVS to 
>>> maintain as well as the work required to plumb or parse all required info.
>>
>> It's not hard to parse.  Just a few function calls.  Most of the required
>> functionality exists in 'lib' code.
>>
>> If you don't like a separate app or script, this information could be printed
> 
> I think it would be better to keep it in OVS rather than a separate 
> app/script.
> 
>> in a flow-dump in a some special field named like 'dp-extra-info' or 
>> whatever.
>> Like this:
>>    
>> recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no),
>>  dp-extra-info:miniflow_bits(0x5, 0x1), actions:hash(sym_l4(0)),recirc(0x1)
>>
> 
> This is interesting approach, but are we missing info such as the in ports, 
> numa nodes etc? Maybe Harry can comment to this as it was his idea 
> originally. It seems with either approach you will be missing some info 
> depending on how you want to approach the debug usecase.

We're not missing core info since flow dumps are printed per-PMD:

flow-dump from the main thread:
<...>
flow-dump from the pmd thread on core X:
<...>

numa could be easily checked by by the core number and I'm not sure
if we really need this information here.

Flow dumps always contains 'in_port' fileld and the port name will
be printed instead of port number if you'll pass --names to the dump command.

> 
>> 'dp-extra-info' might be printed with verbosity enabled if dpif provided this
>> information.
> 
> I haven't looked at ovs-ofctl myself, not sure how much work would be needed 
> to access this info or if you are back to parsing, again I think the idea 
> here is we have this info available in the current approach as is why parse 
> and break out these values?

Not ovs-ofctl - It dumps OF tables, not the datapath flow tables - but
'ovs-dpctl dump-flows' or 'ovs-appctl dpctl/dump-flows'.  dpctl and appctl
are using same code from lib/dpctl.c which is fairly simple.  I'm suggesting
to add this information to dpif_flow.attrs and dpif-netdev will fill this
information while sending dumped flow in dp_netdev_flow_to_dpif_flow().
Should not be hard.
It's not parsing and breaking out, we have a netdev_flow while dumping
and we only need to store a bit more information from it into dpif_flow.

> 
> Would it even be acceptable for 2.13 as well? it would be a new v1 and we're 
> past the soft freeze.

Technically, soft freeze wasn't announced yet. =)
And we also could make it a v3.

> 
>>
>> I really feel that this information (miniflow bits) should be bonded with 
>> flow-dump
>> somehow just because it belongs there.
> 
> I guess we're looking at this from different points of view, don't shoot me 
> but would it make sense to have both approaches? :-D

Not sure about that.  

Re: [ovs-dev] [PATCH] compat: Include confirm_neigh parameter if needed

2020-01-08 Thread Gregory Rose


On 1/8/2020 12:57 AM, Simon Horman wrote:

On Tue, Jan 07, 2020 at 08:35:57AM -0800, Gregory Rose wrote:

On 1/7/2020 1:34 AM, Simon Horman wrote:

On Mon, Jan 06, 2020 at 01:36:34PM -0800, Greg Rose wrote:

...


--- a/datapath/linux/compat/ip6_gre.c
+++ b/datapath/linux/compat/ip6_gre.c
@@ -1089,7 +1089,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff 
*skb,
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
+#ifndef HAVE_DST_OPS_CONFIRM_NEIGH
dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
+#else
+   dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
+#endif

Did you consider using skb_dst_update_pmtu() unconditionally?
That may be cleaner going forwards.

Yes, but it defaults to true for the boolean parameter to confirm the
neighbor entry
and I didn't want to change behavior for older kernels.  If you think that's
not a concern
then I'd be happy to respin the patch using skb_dst_update_pmtu().

Thanks Greg,

after reviewing things another time I agree that what you have done is
correct.


Good deal, thanks Simon!

- Greg
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [v1] dpif-netdev: Add ovs-appctl dpif-netdev/subtable-show.

2020-01-08 Thread Stokes, Ian



On 1/8/2020 4:38 PM, Ilya Maximets wrote:

On 07.01.2020 11:51, Stokes, Ian wrote:



On 1/2/2020 12:27 PM, Ilya Maximets wrote:

On 20.12.2019 16:28, Emma Finn wrote:

Add an ovs-appctl command to iterate through the dpcls
and for each subtable output the miniflow bits for any
existing table.

$ ovs-appctl dpif-netdev/subtable-show
pmd thread numa_id 0
    dpcls port 2:
  subtable:
    unit_0: 2 (0x5)
    unit_1: 1 (0x1)
pmd thread numa_id 1
    dpcls port 3:
  subtable:
    unit_0: 2 (0x5)
    unit_1: 1 (0x1)

Signed-off-by: Emma Finn 

---


So, what about my suggestions and thoughts about alternative solutions
that I posted in reply to RFC?  It's still unclear why we need to disturb
the running datapath to get this information, especially if we could get
it "offline" from the flow dump.


Hi Ilya,

apologies I've only spotted this post now.

I guess to my mind there are a few reasons why this is being added as a new 
command and not a separate application to process/parse the flow dumps of a 
datapath.

(i) Ease of implementation, it seems straight forward enough to follow the 
existing commands structure such as dpif-netdev/pmd-rxq-show to implement this 
command. It had the required elements (required data structures etc.) so 
minimum plumbing was required to get access to that info and it will be 
familiar to any other developers who have already worked or will work in that 
area of the code in the future.

I agree this could be done offline without the need to lock the datapath, but 
from my understanding I don't think the intention here is to run this command 
at a frequent or high interval so I would think that the lock should not be an 
issue unless the command is being executed continually (again similar to 
pmd-rxq-show, it would be called when needed only).

The concern of adding a new separate application for parsing the dump flow as 
you suggested came down to it being another separate app within OVS to maintain 
as well as the work required to plumb or parse all required info.


It's not hard to parse.  Just a few function calls.  Most of the required
functionality exists in 'lib' code.

If you don't like a separate app or script, this information could be printed


I think it would be better to keep it in OVS rather than a separate 
app/script.



in a flow-dump in a some special field named like 'dp-extra-info' or whatever.
Like this:
   
recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), 
dp-extra-info:miniflow_bits(0x5, 0x1), actions:hash(sym_l4(0)),recirc(0x1)



This is interesting approach, but are we missing info such as the in 
ports, numa nodes etc? Maybe Harry can comment to this as it was his 
idea originally. It seems with either approach you will be missing some 
info depending on how you want to approach the debug usecase.



'dp-extra-info' might be printed with verbosity enabled if dpif provided this
information.


I haven't looked at ovs-ofctl myself, not sure how much work would be 
needed to access this info or if you are back to parsing, again I think 
the idea here is we have this info available in the current approach as 
is why parse and break out these values?


Would it even be acceptable for 2.13 as well? it would be a new v1 and 
we're past the soft freeze.




I really feel that this information (miniflow bits) should be bonded with 
flow-dump
somehow just because it belongs there.


I guess we're looking at this from different points of view, don't shoot 
me but would it make sense to have both approaches? :-D




After posting the RFC we had a number of users already applying the patch and 
using it in their deployments, we spoke about it at the conference and didn't 
hear any objections so I this is why the patch has continued with this approach 
for the 2.13 release.


They didn't have any alternatives to use. =)


Agree, but the choice provided worked well so it may not have been a 
problem :D.


Best Regards
Ian


Best regards, Ilya Maximets.


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] OVS 2.13 Soft Freeze date

2020-01-08 Thread Stokes, Ian




On 1/8/2020 11:09 AM, Stokes, Ian wrote:

Hi Ben,

just wanted to confirm if the soft freeze for OVS 2.13 is still 
confirmed for January 15th? Just a had a few queries so thought it would 
be best to ask.


Apologies, was just pointed out to me on the community call, that soft 
freeze was January 1st, I meant to refer to the feature freeze.


Regards
Ian


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V6 09/18] dpctl: Support dump-flows filters "dpdk" and "partially-offloaded"

2020-01-08 Thread Eli Britstein


On 1/8/2020 2:17 PM, Ilya Maximets wrote:
> On 19.12.2019 12:54, Eli Britstein wrote:
>> Flows that are offloaded via DPDK can be partially offloaded (matches
>> only) or fully offloaded (matches and actions). Set partially offloaded
>> display to (offloaded=partial, dp:ovs), and fully offloaded to
>> (offloaded=yes, dp:dpdk). Also support filter types "dpdk" and
>> "partially-offloaded".
>>
>> Signed-off-by: Eli Britstein 
>> ---
>>   NEWS  |  3 ++
>>   lib/dpctl.c   | 97 
>> ---
>>   lib/dpctl.man |  2 ++
>>   3 files changed, 78 insertions(+), 24 deletions(-)
>>
>> diff --git a/NEWS b/NEWS
>> index 2acde3fe8..85c4a4812 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -26,6 +26,9 @@ Post-v2.12.0
>>* DPDK ring ports (dpdkr) are deprecated and will be removed in next
>>  releases.
>>* Add support for DPDK 19.11.
>> +   - 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
>> + partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
>> + type filter supports new filters: "dpdk" and "partially-offloaded".
>>   
>>   v2.12.0 - 03 Sep 2019
>>   -
>> diff --git a/lib/dpctl.c b/lib/dpctl.c
>> index 0ee64718c..387f61ed4 100644
>> --- a/lib/dpctl.c
>> +++ b/lib/dpctl.c
>> @@ -818,7 +818,11 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow 
>> *f, struct hmap *ports,
>>   
>>   dpif_flow_stats_format(>stats, ds);
>>   if (dpctl_p->verbosity && f->attrs.offloaded) {
>> -ds_put_cstr(ds, ", offloaded:yes");
>> +if (f->attrs.dp_layer && !strcmp(f->attrs.dp_layer, "ovs")) {
>> +ds_put_cstr(ds, ", offloaded:partial");
>> +} else {
>> +ds_put_cstr(ds, ", offloaded:yes");
>> +}
>>   }
>>   if (dpctl_p->verbosity && f->attrs.dp_layer) {
>>   ds_put_format(ds, ", dp:%s", f->attrs.dp_layer);
>> @@ -827,20 +831,30 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow 
>> *f, struct hmap *ports,
>>   format_odp_actions(ds, f->actions, f->actions_len, ports);
>>   }
>>   
>> +enum dp_type {
>> +DP_TYPE_ANY,
>> +DP_TYPE_OVS,
>> +DP_TYPE_TC,
>> +DP_TYPE_DPDK,
>> +};
>> +
>> +enum ol_type {
>> +OL_TYPE_ANY,
>> +OL_TYPE_NO,
>> +OL_TYPE_YES,
>> +OL_TYPE_PARTIAL,
>> +};
>> +
>>   struct dump_types {
>> -bool ovs;
>> -bool tc;
>> -bool offloaded;
>> -bool non_offloaded;
>> +enum dp_type dp_type;
>> +enum ol_type ol_type;
>>   };
>>   
>>   static void
>>   enable_all_dump_types(struct dump_types *dump_types)
>>   {
>> -dump_types->ovs = true;
>> -dump_types->tc = true;
>> -dump_types->offloaded = true;
>> -dump_types->non_offloaded = true;
>> +dump_types->dp_type = DP_TYPE_ANY;
>> +dump_types->ol_type = OL_TYPE_ANY;
>>   }
>>   
>>   static int
>> @@ -862,13 +876,17 @@ populate_dump_types(char *types_list, struct 
>> dump_types *dump_types,
>>   current_type[type_len] = '\0';
>>   
>>   if (!strcmp(current_type, "ovs")) {
>> -dump_types->ovs = true;
>> +dump_types->dp_type = DP_TYPE_OVS;
>>   } else if (!strcmp(current_type, "tc")) {
>> -dump_types->tc = true;
>> +dump_types->dp_type = DP_TYPE_TC;
>> +} else if (!strcmp(current_type, "dpdk")) {
>> +dump_types->dp_type = DP_TYPE_DPDK;
>>   } else if (!strcmp(current_type, "offloaded")) {
>> -dump_types->offloaded = true;
>> +dump_types->ol_type = OL_TYPE_YES;
>>   } else if (!strcmp(current_type, "non-offloaded")) {
>> -dump_types->non_offloaded = true;
>> +dump_types->ol_type = OL_TYPE_NO;
>> +} else if (!strcmp(current_type, "partially-offloaded")) {
>> +dump_types->ol_type = OL_TYPE_PARTIAL;
>>   } else if (!strcmp(current_type, "all")) {
>>   enable_all_dump_types(dump_types);
>>   } else {
>> @@ -884,30 +902,61 @@ static void
>>   determine_dpif_flow_dump_types(struct dump_types *dump_types,
>>  struct dpif_flow_dump_types 
>> *dpif_dump_types)
>>   {
>> -dpif_dump_types->ovs_flows = dump_types->ovs || 
>> dump_types->non_offloaded;
>> -dpif_dump_types->netdev_flows = dump_types->tc || dump_types->offloaded
>> -|| dump_types->non_offloaded;
>> +dpif_dump_types->ovs_flows = dump_types->dp_type == DP_TYPE_OVS;
>> +dpif_dump_types->netdev_flows = (dump_types->dp_type == DP_TYPE_TC ||
>> + dump_types->dp_type == DP_TYPE_DPDK);
>>   }
> Above code doesn't handle DP_TYPE_ANY.  This mostly breaks dump-flows
> command if no type specified.
Correct. I already fixed it today. That was the first issue with make 
check-offloads.
>
> Some more issues:  I think this patch will not handle multiple flow
> types correctly.  For example, "dump-flows --type=tc,dpdk" should
> 

Re: [ovs-dev] [v1] dpif-netdev: Add ovs-appctl dpif-netdev/subtable-show.

2020-01-08 Thread Ilya Maximets
On 07.01.2020 11:51, Stokes, Ian wrote:
> 
> 
> On 1/2/2020 12:27 PM, Ilya Maximets wrote:
>> On 20.12.2019 16:28, Emma Finn wrote:
>>> Add an ovs-appctl command to iterate through the dpcls
>>> and for each subtable output the miniflow bits for any
>>> existing table.
>>>
>>> $ ovs-appctl dpif-netdev/subtable-show
>>> pmd thread numa_id 0
>>>    dpcls port 2:
>>>  subtable:
>>>    unit_0: 2 (0x5)
>>>    unit_1: 1 (0x1)
>>> pmd thread numa_id 1
>>>    dpcls port 3:
>>>  subtable:
>>>    unit_0: 2 (0x5)
>>>    unit_1: 1 (0x1)
>>>
>>> Signed-off-by: Emma Finn 
>>>
>>> ---
>>
>> So, what about my suggestions and thoughts about alternative solutions
>> that I posted in reply to RFC?  It's still unclear why we need to disturb
>> the running datapath to get this information, especially if we could get
>> it "offline" from the flow dump.
> 
> Hi Ilya,
> 
> apologies I've only spotted this post now.
> 
> I guess to my mind there are a few reasons why this is being added as a new 
> command and not a separate application to process/parse the flow dumps of a 
> datapath.
> 
> (i) Ease of implementation, it seems straight forward enough to follow the 
> existing commands structure such as dpif-netdev/pmd-rxq-show to implement 
> this command. It had the required elements (required data structures etc.) so 
> minimum plumbing was required to get access to that info and it will be 
> familiar to any other developers who have already worked or will work in that 
> area of the code in the future.
> 
> I agree this could be done offline without the need to lock the datapath, but 
> from my understanding I don't think the intention here is to run this command 
> at a frequent or high interval so I would think that the lock should not be 
> an issue unless the command is being executed continually (again similar to 
> pmd-rxq-show, it would be called when needed only).
> 
> The concern of adding a new separate application for parsing the dump flow as 
> you suggested came down to it being another separate app within OVS to 
> maintain as well as the work required to plumb or parse all required info.

It's not hard to parse.  Just a few function calls.  Most of the required
functionality exists in 'lib' code.

If you don't like a separate app or script, this information could be printed
in a flow-dump in a some special field named like 'dp-extra-info' or whatever.
Like this:
  
recirc_id(0),in_port(1),packet_type(ns=0,id=0),eth_type(0x0800),ipv4(frag=no), 
dp-extra-info:miniflow_bits(0x5, 0x1), actions:hash(sym_l4(0)),recirc(0x1)

'dp-extra-info' might be printed with verbosity enabled if dpif provided this
information.

I really feel that this information (miniflow bits) should be bonded with 
flow-dump
somehow just because it belongs there.

> 
> After posting the RFC we had a number of users already applying the patch and 
> using it in their deployments, we spoke about it at the conference and didn't 
> hear any objections so I this is why the patch has continued with this 
> approach for the 2.13 release.

They didn't have any alternatives to use. =)

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] thundering herd wakeup of handler threads

2020-01-08 Thread Aaron Conole
David Ahern  writes:

> On 12/16/19 2:42 PM, Aaron Conole wrote:
>> Can you try the following and see if your scalability issue is
>> addressed?  I think it could be better integrated, but this is a
>> different quick 'n dirty.
>
> your patch reduces the number of threads awakened, but it is still
> really high - 43 out of 71 handler threads in one test.

Thanks for getting back to me (sorry for the late response, catching
up).  I'll take a closer look.  I haven't thought about adding
EPOLLEXCLUSIVE to the poll block change, so maybe that will address it
completely, or maybe there's a different approach.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] dpif-netdev: Get rid of broken dpif pointer in dp_netdev structure.

2020-01-08 Thread Ilya Maximets
On 07.01.2020 20:19, Ben Pfaff wrote:
> On Sun, Dec 08, 2019 at 08:33:46PM +0100, Ilya Maximets wrote:
>> This pointer was introduced in July 2014 by commit
>> 6b31e07347ad ("dpif-netdev: Polling threads directly call ofproto upcall 
>> functions.")
>> and it was broken right from this point because dpif_netdev_open()
>> updates it on each call with the pointer to a newly allocated
>> 'dpif' structure that becomes invalid on the next dpif_netdev_close().
>> Since dpif_open/close() always happens asynchronously from different
>> threads and pointer is not protected by rcu or mutex (it's not even
>> atomic) it's not possible to safely use it.  Thankfully the actual
>> usage was in repository for less than 3 weeks and was removed by
>> commit 623540e4617e ("dpif-netdev: Streamline miss handling.").  Until
>> recently this pointer was used in order to pass it to dpif_flow_hash().
>> Another luck is that dpif_flow_hash() didn't use the 'dpif' argument.
>>
>> However, we tried to use it while netdev offloading by commit
>> 30115809da2e ("dpif-netdev: Use netdev-offload API for port lookup while 
>> offloading.")
>> and that unveiled the issue.
>>
>> Now that all the code that used this pointer was cleaned up we can
>> just remove it from the structure to avoid possible misuse in the
>> future.
>>
>> Signed-off-by: Ilya Maximets 
> 
> Good fix.  Thanks.
> 
> Acked-by: Ben Pfaff 
> 

Thanks!  I applied both patches to master.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] acinclude: Use RTE_IBVERBS_LINK_DLOPEN

2020-01-08 Thread Aaron Conole
Timothy Redaelli  writes:

> On DPDK 19.11 RTE_IBVERBS_LINK_DLOPEN is used by Mellanox PMDs (mlx4 and
> mlx5) instead of RTE_LIBRTE_MLX{4,5}_DLOPEN_DEPS.
>
> Without this commit is not possible to statically link OVS with DPDK when MLX4
> or MLX5 PMDs are enabled.
>
> Signed-off-by: Timothy Redaelli 
> ---

Acked-by: Aaron Conole 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] acinclude: Use RTE_IBVERBS_LINK_DLOPEN

2020-01-08 Thread David Marchand
On Wed, Jan 8, 2020 at 1:42 PM Timothy Redaelli  wrote:
> On DPDK 19.11 RTE_IBVERBS_LINK_DLOPEN is used by Mellanox PMDs (mlx4 and
> mlx5) instead of RTE_LIBRTE_MLX{4,5}_DLOPEN_DEPS.
>
> Without this commit is not possible to statically link OVS with DPDK when MLX4
> or MLX5 PMDs are enabled.
>
> Signed-off-by: Timothy Redaelli 
> ---
>  acinclude.m4 | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/acinclude.m4 b/acinclude.m4
> index 18264c43b..1c8791d53 100644
> --- a/acinclude.m4
> +++ b/acinclude.m4
> @@ -378,14 +378,14 @@ AC_DEFUN([OVS_CHECK_DPDK], [
>
>  AC_CHECK_DECL([RTE_LIBRTE_MLX5_PMD], [dnl found
>OVS_FIND_DEPENDENCY([mnl_attr_put], [mnl], [libmnl])
> -  AC_CHECK_DECL([RTE_LIBRTE_MLX5_DLOPEN_DEPS], [], [dnl not found
> +  AC_CHECK_DECL([RTE_IBVERBS_LINK_DLOPEN], [], [dnl not found
>  OVS_FIND_DEPENDENCY([mlx5dv_create_wq], [mlx5], [libmlx5])
>  OVS_FIND_DEPENDENCY([verbs_init_cq], [ibverbs], [libibverbs])
>], [[#include ]])
>  ], [], [[#include ]])
>
>  AC_CHECK_DECL([RTE_LIBRTE_MLX4_PMD], [dnl found
> -  AC_CHECK_DECL([RTE_LIBRTE_MLX4_DLOPEN_DEPS], [], [dnl not found
> +  AC_CHECK_DECL([RTE_IBVERBS_LINK_DLOPEN], [], [dnl not found
>  OVS_FIND_DEPENDENCY([mlx4dv_init_obj], [mlx4], [libmlx4])
>  OVS_FIND_DEPENDENCY([verbs_init_cq], [ibverbs], [libibverbs])
>], [[#include ]])

LGTM.
Acked-by: David Marchand 


-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH dpdk-latest v2 0/4] Add support for TSO with DPDK

2020-01-08 Thread Ilya Maximets
On 31.12.2019 21:14, Flavio Leitner wrote:
> Abbreviated as TSO, TCP Segmentation Offload is a feature which enables
> the network stack to delegate the TCP segmentation to the NIC reducing
> the per packet CPU overhead.
> 
> A guest using vhost-user interface with TSO enabled can send TCP packets
> much bigger than the MTU, which saves CPU cycles normally used to break
> the packets down to MTU size and to calculate checksums.
> 
> It also saves CPU cycles used to parse multiple packets/headers during
> the packet processing inside virtual switch.
> 
> If the destination of the packet is another guest in the same host, then
> the same big packet can be sent through a vhost-user interface skipping
> the segmentation completely. However, if the destination is not local,
> the NIC hardware is instructed to do the TCP segmentation and checksum
> calculation.
> 
> The first 3 patches are not really part of TSO support, but they are
> required to make sure everything works.
> 
> The TSO is enabled in the vhost-user ports in client mode and linux ports
> in userspace.
> 
> This patchset is based on branch dpdk-latest (v19.11 required).
> 
> There are good improvements sending to or receiving from veth pairs or
> tap devices as well.
> 
> Flavio Leitner (4):
>   dp-packet: preserve headroom when cloning a pkt batch
>   vhost: Disable multi-segmented buffers
>   dp-packet: handle new dpdk buffer flags
>   netdev-dpdk: Add TCP Segmentation Offload support
> 
>  Documentation/automake.mk   |   1 +
>  Documentation/topics/dpdk/index.rst |   1 +
>  Documentation/topics/dpdk/tso.rst   |  89 
>  NEWS|   1 +
>  lib/automake.mk |   2 +
>  lib/conntrack.c |  29 ++-
>  lib/dp-packet.c |   4 +-
>  lib/dp-packet.h | 160 +-
>  lib/ipf.c   |  32 +--
>  lib/netdev-dpdk.c   | 317 
>  lib/netdev-linux-private.h  |   4 +
>  lib/netdev-linux.c  | 295 +++---
>  lib/netdev-provider.h   |  10 +
>  lib/netdev.c|  52 -
>  lib/tso.c   |  54 +
>  lib/tso.h   |  23 ++
>  vswitchd/bridge.c   |   2 +
>  vswitchd/vswitch.xml|  12 ++
>  18 files changed, 997 insertions(+), 91 deletions(-)
>  create mode 100644 Documentation/topics/dpdk/tso.rst
>  create mode 100644 lib/tso.c
>  create mode 100644 lib/tso.h
> 

Hi, Flavio.
I didn't look through the code yet, but since you're probably preparing
v3 based on review from Ian, please, fix sparse issues too.
It complains about types for virtio header:

https://travis-ci.org/ovsrobot/ovs/jobs/631404835

lib/netdev-linux.c:6399:23: error: incorrect type in assignment (different base 
types)
lib/netdev-linux.c:6399:23:expected restricted __virtio16 [usertype] hdr_len
lib/netdev-linux.c:6399:23:got long
lib/netdev-linux.c:6400:23: error: bad assignment (+=) to restricted __virtio16
lib/netdev-linux.c:6401:36: error: restricted __virtio16 degrades to integer
lib/netdev-linux.c:6401:24: error: incorrect type in assignment (different base 
types)
lib/netdev-linux.c:6401:24:expected restricted __virtio16 [usertype] 
gso_size
lib/netdev-linux.c:6401:24:got int
lib/netdev-linux.c:6414:26: error: incorrect type in assignment (different base 
types)
lib/netdev-linux.c:6414:26:expected restricted __virtio16 [usertype] 
csum_start
lib/netdev-linux.c:6414:26:got long
lib/netdev-linux.c:6417:31: error: incorrect type in assignment (different base 
types)
lib/netdev-linux.c:6417:31:expected restricted __virtio16 [usertype] 
csum_offset
lib/netdev-linux.c:6417:31:got unsigned long
lib/netdev-linux.c:6420:31: error: incorrect type in assignment (different base 
types)
lib/netdev-linux.c:6420:31:expected restricted __virtio16 [usertype] 
csum_offset
lib/netdev-linux.c:6420:31:got unsigned long
lib/netdev-linux.c:6423:31: error: incorrect type in assignment (different base 
types)
lib/netdev-linux.c:6423:31:expected restricted __virtio16 [usertype] 
csum_offset
lib/netdev-linux.c:6423:31:got unsigned long

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] acinclude: Use RTE_IBVERBS_LINK_DLOPEN

2020-01-08 Thread Timothy Redaelli
On DPDK 19.11 RTE_IBVERBS_LINK_DLOPEN is used by Mellanox PMDs (mlx4 and
mlx5) instead of RTE_LIBRTE_MLX{4,5}_DLOPEN_DEPS.

Without this commit is not possible to statically link OVS with DPDK when MLX4
or MLX5 PMDs are enabled.

Signed-off-by: Timothy Redaelli 
---
 acinclude.m4 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/acinclude.m4 b/acinclude.m4
index 18264c43b..1c8791d53 100644
--- a/acinclude.m4
+++ b/acinclude.m4
@@ -378,14 +378,14 @@ AC_DEFUN([OVS_CHECK_DPDK], [
 
 AC_CHECK_DECL([RTE_LIBRTE_MLX5_PMD], [dnl found
   OVS_FIND_DEPENDENCY([mnl_attr_put], [mnl], [libmnl])
-  AC_CHECK_DECL([RTE_LIBRTE_MLX5_DLOPEN_DEPS], [], [dnl not found
+  AC_CHECK_DECL([RTE_IBVERBS_LINK_DLOPEN], [], [dnl not found
 OVS_FIND_DEPENDENCY([mlx5dv_create_wq], [mlx5], [libmlx5])
 OVS_FIND_DEPENDENCY([verbs_init_cq], [ibverbs], [libibverbs])
   ], [[#include ]])
 ], [], [[#include ]])
 
 AC_CHECK_DECL([RTE_LIBRTE_MLX4_PMD], [dnl found
-  AC_CHECK_DECL([RTE_LIBRTE_MLX4_DLOPEN_DEPS], [], [dnl not found
+  AC_CHECK_DECL([RTE_IBVERBS_LINK_DLOPEN], [], [dnl not found
 OVS_FIND_DEPENDENCY([mlx4dv_init_obj], [mlx4], [libmlx4])
 OVS_FIND_DEPENDENCY([verbs_init_cq], [ibverbs], [libibverbs])
   ], [[#include ]])
-- 
2.24.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V6 09/18] dpctl: Support dump-flows filters "dpdk" and "partially-offloaded"

2020-01-08 Thread Ilya Maximets
On 19.12.2019 12:54, Eli Britstein wrote:
> Flows that are offloaded via DPDK can be partially offloaded (matches
> only) or fully offloaded (matches and actions). Set partially offloaded
> display to (offloaded=partial, dp:ovs), and fully offloaded to
> (offloaded=yes, dp:dpdk). Also support filter types "dpdk" and
> "partially-offloaded".
> 
> Signed-off-by: Eli Britstein 
> ---
>  NEWS  |  3 ++
>  lib/dpctl.c   | 97 
> ---
>  lib/dpctl.man |  2 ++
>  3 files changed, 78 insertions(+), 24 deletions(-)
> 
> diff --git a/NEWS b/NEWS
> index 2acde3fe8..85c4a4812 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -26,6 +26,9 @@ Post-v2.12.0
>   * DPDK ring ports (dpdkr) are deprecated and will be removed in next
> releases.
>   * Add support for DPDK 19.11.
> +   - 'ovs-appctl dpctl/dump-flows' can now show offloaded=partial for
> + partially offloaded flows, dp:dpdk for fully offloaded by dpdk, and
> + type filter supports new filters: "dpdk" and "partially-offloaded".
>  
>  v2.12.0 - 03 Sep 2019
>  -
> diff --git a/lib/dpctl.c b/lib/dpctl.c
> index 0ee64718c..387f61ed4 100644
> --- a/lib/dpctl.c
> +++ b/lib/dpctl.c
> @@ -818,7 +818,11 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow 
> *f, struct hmap *ports,
>  
>  dpif_flow_stats_format(>stats, ds);
>  if (dpctl_p->verbosity && f->attrs.offloaded) {
> -ds_put_cstr(ds, ", offloaded:yes");
> +if (f->attrs.dp_layer && !strcmp(f->attrs.dp_layer, "ovs")) {
> +ds_put_cstr(ds, ", offloaded:partial");
> +} else {
> +ds_put_cstr(ds, ", offloaded:yes");
> +}
>  }
>  if (dpctl_p->verbosity && f->attrs.dp_layer) {
>  ds_put_format(ds, ", dp:%s", f->attrs.dp_layer);
> @@ -827,20 +831,30 @@ format_dpif_flow(struct ds *ds, const struct dpif_flow 
> *f, struct hmap *ports,
>  format_odp_actions(ds, f->actions, f->actions_len, ports);
>  }
>  
> +enum dp_type {
> +DP_TYPE_ANY,
> +DP_TYPE_OVS,
> +DP_TYPE_TC,
> +DP_TYPE_DPDK,
> +};
> +
> +enum ol_type {
> +OL_TYPE_ANY,
> +OL_TYPE_NO,
> +OL_TYPE_YES,
> +OL_TYPE_PARTIAL,
> +};
> +
>  struct dump_types {
> -bool ovs;
> -bool tc;
> -bool offloaded;
> -bool non_offloaded;
> +enum dp_type dp_type;
> +enum ol_type ol_type;
>  };
>  
>  static void
>  enable_all_dump_types(struct dump_types *dump_types)
>  {
> -dump_types->ovs = true;
> -dump_types->tc = true;
> -dump_types->offloaded = true;
> -dump_types->non_offloaded = true;
> +dump_types->dp_type = DP_TYPE_ANY;
> +dump_types->ol_type = OL_TYPE_ANY;
>  }
>  
>  static int
> @@ -862,13 +876,17 @@ populate_dump_types(char *types_list, struct dump_types 
> *dump_types,
>  current_type[type_len] = '\0';
>  
>  if (!strcmp(current_type, "ovs")) {
> -dump_types->ovs = true;
> +dump_types->dp_type = DP_TYPE_OVS;
>  } else if (!strcmp(current_type, "tc")) {
> -dump_types->tc = true;
> +dump_types->dp_type = DP_TYPE_TC;
> +} else if (!strcmp(current_type, "dpdk")) {
> +dump_types->dp_type = DP_TYPE_DPDK;
>  } else if (!strcmp(current_type, "offloaded")) {
> -dump_types->offloaded = true;
> +dump_types->ol_type = OL_TYPE_YES;
>  } else if (!strcmp(current_type, "non-offloaded")) {
> -dump_types->non_offloaded = true;
> +dump_types->ol_type = OL_TYPE_NO;
> +} else if (!strcmp(current_type, "partially-offloaded")) {
> +dump_types->ol_type = OL_TYPE_PARTIAL;
>  } else if (!strcmp(current_type, "all")) {
>  enable_all_dump_types(dump_types);
>  } else {
> @@ -884,30 +902,61 @@ static void
>  determine_dpif_flow_dump_types(struct dump_types *dump_types,
> struct dpif_flow_dump_types *dpif_dump_types)
>  {
> -dpif_dump_types->ovs_flows = dump_types->ovs || 
> dump_types->non_offloaded;
> -dpif_dump_types->netdev_flows = dump_types->tc || dump_types->offloaded
> -|| dump_types->non_offloaded;
> +dpif_dump_types->ovs_flows = dump_types->dp_type == DP_TYPE_OVS;
> +dpif_dump_types->netdev_flows = (dump_types->dp_type == DP_TYPE_TC ||
> + dump_types->dp_type == DP_TYPE_DPDK);
>  }

Above code doesn't handle DP_TYPE_ANY.  This mostly breaks dump-flows
command if no type specified.

Some more issues:  I think this patch will not handle multiple flow
types correctly.  For example, "dump-flows --type=tc,dpdk" should
dump "flows that are in TC, but not in HW" + "flows that are in HW via
DPDK". So, it should not dump flows that handled purely by OVS or
offloaded to HW via TC.  I believe, this case will not work correctly
with current implementation of this patch.

>  
>  static bool
> -flow_passes_type_filter(const 

Re: [ovs-dev] [v1] dpif-netdev: Add ovs-appctl dpif-netdev/subtable-show.

2020-01-08 Thread Finn, Emma


> -Original Message-
> From: Stokes, Ian 
> Sent: Tuesday 7 January 2020 10:21
> To: Finn, Emma ; u9012...@gmail.com;
> i.maxim...@ovn.org; ovs-dev@openvswitch.org
> Subject: Re: [ovs-dev] [v1] dpif-netdev: Add ovs-appctl dpif-
> netdev/subtable-show.
> 
> 
> 
> On 12/20/2019 3:28 PM, Emma Finn wrote:
> > Add an ovs-appctl command to iterate through the dpcls and for each
> > subtable output the miniflow bits for any existing table.
> >
> > $ ovs-appctl dpif-netdev/subtable-show pmd thread numa_id 0
> >dpcls port 2:
> >  subtable:
> >unit_0: 2 (0x5)
> >unit_1: 1 (0x1)
> > pmd thread numa_id 1
> >dpcls port 3:
> >  subtable:
> >unit_0: 2 (0x5)
> >unit_1: 1 (0x1)
> >
> > Signed-off-by: Emma Finn 
> >
> 
> Hi Emma,
> 
> thanks for the patch, comments below.
> 
> 
> > ---
> >
> > RFC -> v1
> >
> > * Changed revision from RFC to v1
> > * Reformatted based on comments
> > * Fixed same classifier being dumped multiple times
> >flagged by Ilya
> > * Fixed print of subtables flagged by William
> > * Updated print count of bits as well as bits themselves
> > ---
> >   NEWS|  2 ++
> >   lib/dpif-netdev-unixctl.man |  4 
> >   lib/dpif-netdev.c   | 53
> -
> >   3 files changed, 58 insertions(+), 1 deletion(-)
> >
> > diff --git a/NEWS b/NEWS
> > index 2acde3f..2b7b0cc 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -26,6 +26,8 @@ Post-v2.12.0
> >* DPDK ring ports (dpdkr) are deprecated and will be removed in next
> >  releases.
> >* Add support for DPDK 19.11.
> > + * New "ovs-appctl dpif-netdev/subtable-show" command for
> userspace
> > +   datapath to show subtable miniflow bits.
> 
> Will need to rebase to master as new items have been added in NEWS. Also I
> don't think this belongs under the DPDK section, rather the userspace
> header should be added and used i.e.
> 
> +   - Userspace datapath:
> + * New "ovs-appctl dpif-netdev/subtable-show" command for userspace
> +   datapath to show subtable miniflow bits.
> 
> >
> >   v2.12.0 - 03 Sep 2019
> >   -
> > diff --git a/lib/dpif-netdev-unixctl.man b/lib/dpif-netdev-unixctl.man
> > index 6c54f6f..c443465 100644
> > --- a/lib/dpif-netdev-unixctl.man
> > +++ b/lib/dpif-netdev-unixctl.man
> > @@ -217,3 +217,7 @@ with port names, which this thread polls.
> >   .
> >   .IP "\fBdpif-netdev/pmd-rxq-rebalance\fR [\fIdp\fR]"
> >   Reassigns rxqs to pmds in the datapath \fIdp\fR based on their current
> usage.
> > +.
> > +.IP "\fBdpif-netdev/subtable-show\fR [\fB-pmd\fR \fIcore\fR] [\fIdp\fR]"
> > +For one or all pmd threads of the datapath \fIdp\fR show the list of
> > +miniflow bits for each subtable in the datapath classifier.
> > \ No newline at end of file
> > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
> > 8485b54..a166b9e 100644
> > --- a/lib/dpif-netdev.c
> > +++ b/lib/dpif-netdev.c
> > @@ -857,6 +857,8 @@ static inline bool
> >   pmd_perf_metrics_enabled(const struct dp_netdev_pmd_thread *pmd);
> >   static void queue_netdev_flow_del(struct dp_netdev_pmd_thread
> *pmd,
> > struct dp_netdev_flow *flow);
> > +static void pmd_info_show_subtable(struct ds *reply,
> > +   struct dp_netdev_pmd_thread *pmd);
> >
> >   static void
> >   emc_cache_init(struct emc_cache *flow_cache) @@ -979,6 +981,7 @@
> > enum pmd_info_type {
> >   PMD_INFO_CLEAR_STATS, /* Set the cycles count to 0. */
> >   PMD_INFO_SHOW_RXQ,/* Show poll lists of pmd threads. */
> >   PMD_INFO_PERF_SHOW,   /* Show pmd performance details. */
> > +PMD_INFO_SHOW_SUBTABLE, /* Show subtable miniflow bits. */
> >   };
> >
> >   static void
> > @@ -1334,6 +1337,8 @@ dpif_netdev_pmd_info(struct unixctl_conn
> *conn, int argc, const char *argv[],
> >   pmd_info_show_stats(, pmd);
> >   } else if (type == PMD_INFO_PERF_SHOW) {
> >   pmd_info_show_perf(, pmd, (struct pmd_perf_params
> > *)aux);
> > +} else if (type == PMD_INFO_SHOW_SUBTABLE) {
> > +pmd_info_show_subtable(, pmd);
> >   }
> >   }
> >   free(pmd_list);
> > @@ -1391,7 +1396,8 @@ dpif_netdev_init(void)
> >   {
> >   static enum pmd_info_type show_aux = PMD_INFO_SHOW_STATS,
> > clear_aux = PMD_INFO_CLEAR_STATS,
> > -  poll_aux = PMD_INFO_SHOW_RXQ;
> > +  poll_aux = PMD_INFO_SHOW_RXQ,
> > +  subtable_aux = PMD_INFO_SHOW_SUBTABLE;
> >
> >   unixctl_command_register("dpif-netdev/pmd-stats-show", "[-pmd
> core] [dp]",
> >0, 3, dpif_netdev_pmd_info, @@ -1416,6
> > +1422,9 @@ dpif_netdev_init(void)
> >"[-us usec] [-q qlen]",
> >0, 10, pmd_perf_log_set_cmd,
> >   

Re: [ovs-dev] Question on ovsdb_idl_omit_alert()

2020-01-08 Thread Numan Siddique
On Wed, Jan 8, 2020 at 1:23 PM Han Zhou  wrote:
>
> On Thu, Jan 2, 2020 at 11:51 AM Numan Siddique  wrote:
> >
> > On Fri, Jan 3, 2020 at 1:00 AM Ben Pfaff  wrote:
> > >
> > > On Fri, Jan 03, 2020 at 12:45:06AM +0530, Numan Siddique wrote:
> > > > Hi Ben,
> > > >
> > > > ovn-controller.c has the below code here [1]
> > > >
> > > > ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, _chassis_col_nb_cfg);
> > > >
> > > > From what I understand from this comment [2], when ovn-controller
> > > > updates this column, SB ovsdb-server shouldn't send the
> > > > update2/update3 message back to the ovn-controller and to all the
> > > > other ovn-controllers right ?
> > > >
> > > > But I don't see it working as expected. I can see that ovsdb-server
> > > > sends the update2/update3 message to all the connected clients.
> > > >
> > > > Is this a bug ? Or is my understanding wrong ?
> > >
> > > I think that is a misunderstanding.  "Omit alert" does not suppress
> > > messages from ovsdb-server.  Instead, it keeps the ovsdb IDL from
> > > incrementing the sequence number that indicates that something has
> > > changed in the database, that is, it keeps from alerting the IDL's
> > > client that something has changed.
> >
> > Thanks for the reply and clearing the misunderstanding.
> >
> > Numan
> >
>
> Hi Numan,
>
> I had sent a patch to improve this [0], and the last discussion was [1].
>
> Ben, if you remember, you suggested about improving the conditional monitor
> in ovsdb to avoid this problem [1]. However, after checking the conditional
> monitor code I feel the change is not trivial and in fact I didn't find a
> proper way to do it without sacrificing ovsdb performance. So I think the
> simple change in the DB schema [0] would worth it. Please let me know if
> you would like to revisit it.

Thanks Han for sharing this. I guess you have already hit this problem
a long time ago
which we are seeing now in our scale testing.

I am supportive of adding a new chassis private table and I think it's
worth revisiting it.

Thanks
Numan


>
> [0] https://patchwork.ozlabs.org/patch/899608/
> [1]
> https://lists.linuxfoundation.org/pipermail/ovs-dev/2018-April/346206.html
>
> Thanks,
> Han
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2 ovn 0/2] fix DNSSL and Route Info Option in RA

2020-01-08 Thread Numan Siddique
On Tue, Jan 7, 2020 at 10:20 PM Lorenzo Bianconi
 wrote:
>
> Changes since v1:
> - rely on xstrdup instead of ds_clone
>
> Lorenzo Bianconi (2):
>   DNSSL: copy dnssl string in order to avoid truncated value
>   RA Route Info Option: copy route info string in order to avoid
> truncated value

Thanks Lorenzo for the patches. I applied these to master.

Numan

>
>  controller/pinctrl.c | 33 +
>  1 file changed, 21 insertions(+), 12 deletions(-)
>
> --
> 2.21.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [RFC] odp-execute: dp_hash HW offload inconsistency.

2020-01-08 Thread Roni Bar Yanai
Hi Ilya,

I have a setup with vxlan,  and then I do dp_hash (after pop).
First packet does pop in SW, so RSS is invalidated (and anyway it was probably
 calculated on outer),  dp_hash is calculated again SW.
Following packet will do decap in HW so packet has a valid internal RSS 
(mark+rss),
And it is used as the hash. I have inconsistency between the packets that passed
 through SW and packet that were offloaded with the same 5-tuple (internal).

I know vxlan pop is not offload yet, but we hope it will soon.
I thought of solving this issue by calling netdev_offload_dpdk layer to calc 
the hash in case
when HW offload Is enabled. So it will be the same hash for both cases.
dp_hash HW offload without a tunnel, will have the same issue, as HW will use 
some hash not
 necessarily the same as RSS configuration, and even if RSS is used, in SW 
there is another
salt on top of that, so packet with same 5-tuple offload vs not offloaded will 
get different hash.

diff --git a/lib/odp-execute.c b/lib/odp-execute.c
index 563ad1d..26dc907 100644
--- a/lib/odp-execute.c
+++ b/lib/odp-execute.c
@@ -822,14 +822,18 @@ odp_execute_actions(void *dp, struct dp_packet_batch 
*batch, bool steal,
 DP_PACKET_BATCH_FOR_EACH (i, packet, batch) {
 /* RSS hash can be used here instead of 5tuple for
  * performance reasons. */
-if (dp_packet_rss_valid(packet)) {
-hash = dp_packet_get_rss_hash(packet);
-hash = hash_int(hash, hash_act->hash_basis);
+if (netdev_is_flow_api_enabled()) {
+hash = netdev_offload_dpdk_set_dp_hash(packet);
 } else {
-flow_extract(packet, );
-hash = flow_hash_5tuple(, hash_act->hash_basis);
-}
-packet->md.dp_hash = hash;
+
+if (dp_packet_rss_valid(packet)) {
+hash = dp_packet_get_rss_hash(packet);
+hash = hash_int(hash, hash_act->hash_basis);
+} else {
+flow_extract(packet, );
+hash = flow_hash_5tuple(, 
hash_act->hash_basis);
+}
+packet->md.dp_hash = hash;
 }
 break;
 }

What do you think?

BR,
Roni

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] OVS 2.13 Soft Freeze date

2020-01-08 Thread Stokes, Ian

Hi Ben,

just wanted to confirm if the soft freeze for OVS 2.13 is still 
confirmed for January 15th? Just a had a few queries so thought it would 
be best to ask.


Regards
Ian
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH dpdk-latest v2 4/4] netdev-dpdk: Add TCP Segmentation Offload support

2020-01-08 Thread Stokes, Ian




On 1/7/2020 6:09 PM, Flavio Leitner wrote:


Hi Ian,

Thanks for the reviews. I agree with your comments for the other
patches. This one I will answer them inline.



Thanks Flavio,

below seems good to me, I know Ciara is testing this (and will test the 
latest revsion also with a few different NICs). All going well we will 
look at the v3 and hopefully be in position to accept for 2.13 (code 
freeze is January 15th, but let me confirm with Ben).


Regards
Ian



On Mon, Jan 06, 2020 at 08:24:48PM +, Stokes, Ian wrote:



On 12/31/2019 8:14 PM, Flavio Leitner wrote:

Abbreviated as TSO, TCP Segmentation Offload is a feature which enables
the network stack to delegate the TCP segmentation to the NIC reducing
the per packet CPU overhead.

A guest using vhostuser interface with TSO enabled can send TCP packets
much bigger than the MTU, which saves CPU cycles normally used to break
the packets down to MTU size and to calculate checksums.

It also saves CPU cycles used to parse multiple packets/headers during
the packet processing inside virtual switch.

If the destination of the packet is another guest in the same host, then
the same big packet can be sent through a vhostuser interface skipping
the segmentation completely. However, if the destination is not local,
the NIC hardware is instructed to do the TCP segmentation and checksum
calculation.

It is recommended to check if NIC hardware supports TSO before enabling
the feature, which is off by default.


Might be useful to add a link to the DPDK Networking Device feature support
as it lists whether TSO is supported for a given device currently

https://doc.dpdk.org/guides/nics/overview.html


Yeah, will add to the documentation.




Signed-off-by: Flavio Leitner 
---
   Documentation/automake.mk   |   1 +
   Documentation/topics/dpdk/index.rst |   1 +
   Documentation/topics/dpdk/tso.rst   |  89 
   NEWS|   1 +
   lib/automake.mk |   2 +
   lib/conntrack.c |  29 ++-
   lib/dp-packet.h | 152 +-
   lib/ipf.c   |  32 +--
   lib/netdev-dpdk.c   | 311 
   lib/netdev-linux-private.h  |   4 +
   lib/netdev-linux.c  | 295 +++---
   lib/netdev-provider.h   |  10 +
   lib/netdev.c|  52 -
   lib/tso.c   |  54 +
   lib/tso.h   |  23 ++
   vswitchd/bridge.c   |   2 +
   vswitchd/vswitch.xml|  12 ++
   17 files changed, 982 insertions(+), 88 deletions(-)
   create mode 100644 Documentation/topics/dpdk/tso.rst
   create mode 100644 lib/tso.c
   create mode 100644 lib/tso.h

diff --git a/Documentation/automake.mk b/Documentation/automake.mk
index 5f7c3e07b..abe5aaed1 100644
--- a/Documentation/automake.mk
+++ b/Documentation/automake.mk
@@ -35,6 +35,7 @@ DOC_SOURCE = \
Documentation/topics/dpdk/index.rst \
Documentation/topics/dpdk/bridge.rst \
Documentation/topics/dpdk/jumbo-frames.rst \
+   Documentation/topics/dpdk/tso.rst \
Documentation/topics/dpdk/memory.rst \
Documentation/topics/dpdk/pdump.rst \
Documentation/topics/dpdk/phy.rst \
diff --git a/Documentation/topics/dpdk/index.rst 
b/Documentation/topics/dpdk/index.rst
index f2862ea70..400d56051 100644
--- a/Documentation/topics/dpdk/index.rst
+++ b/Documentation/topics/dpdk/index.rst
@@ -40,4 +40,5 @@ DPDK Support
  /topics/dpdk/qos
  /topics/dpdk/pdump
  /topics/dpdk/jumbo-frames
+   /topics/dpdk/tso
  /topics/dpdk/memory
diff --git a/Documentation/topics/dpdk/tso.rst 
b/Documentation/topics/dpdk/tso.rst
new file mode 100644
index 0..0724513bd
--- /dev/null
+++ b/Documentation/topics/dpdk/tso.rst
@@ -0,0 +1,89 @@
+..
+  Copyright 2019, Red Hat, Inc.

Minor but probably 2020 above now for the next revision, goes for the other
new files added also.


Oh, right, going to update that.



+
+  Licensed under the Apache License, Version 2.0 (the "License"); you may
+  not use this file except in compliance with the License. You may obtain
+  a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  License for the specific language governing permissions and limitations
+  under the License.
+
+  Convention for heading levels in Open vSwitch documentation:
+
+  ===  Heading 0 (reserved for the title in a document)
+  ---  Heading 1
+  ~~~  Heading 2
+  +++  Heading 3
+  '''  Heading 4
+
+  Avoid deeper levels because they do not render well.
+

Re: [ovs-dev] [PATCH 1/1] vswitchd: Allow setting MAC on DPDK interfaces

2020-01-08 Thread Ophir Munk
Hi,
There is an important use case for having OVS change MAC addresses of dpdk 
interfaces. 
OpenStack for example needs to update the MAC address of a VF assigned to a VM, 
where the corresponding VF representor is owned by dpdk. 
For some NIC vendors using "ifconfig" or "ip" commands - is not an option (if 
the NIC is not bifurcated). 
Therefore OpenStack should use the OVS API to set the MAC address for dpdk 
interfaces.
Along with Ben's explanation it seems right to only allow "internal" or "dpdk" 
port types to set the MAC.

Testing both patches [1] and [2] - passed successfully.
Acked-by: Ophir Munk 

I hope patches [1] and [2] can be merged to master.

[1]
https://patchwork.ozlabs.org/patch/1186896/
("[ovs-dev,v2] netdev-dpdk: Add ability to set MAC address.")

[2]
https://patchwork.ozlabs.org/patch/1215075/
("[ovs-dev,1/1] vswitchd: Allow setting MAC on DPDK interfaces")

> -Original Message-
> From: Ben Pfaff 
> Sent: Tuesday, January 7, 2020 2:26 AM
> To: Ilya Maximets 
> Cc: Eveline Raine ; d...@openvswitch.org; Moshe
> Levi ; Adrian Chiris ;
> Ophir Munk ; Majd Dibbiny
> ; Roni Bar Yanai ; Ameer
> Mahagneh 
> Subject: Re: [PATCH 1/1] vswitchd: Allow setting MAC on DPDK interfaces
> 
> On Fri, Jan 03, 2020 at 03:56:59PM +0100, Ilya Maximets wrote:
> > Ben, do you see any other drawbacks that we should handle if we'll
> > allow changing MAC addresses for non-internal ports?  Or, maybe some
> > issues with my logic?
> 
> It can cause surprises for interactions with regular system tools.
> Anyone who uses "ip" or "ifconfig" to change the MAC will find it changed
> back later (perhaps not immediately).  And if you un-set it in the database,
> OVS doesn't know what to change it back to.
> 
> These drawbacks aren't there in the same way for devices that OVS "owns"
> like internal devices or dpdk devices.  Well, I guess they are for OVS 
> internal
> devices to some extent, but for those OVS has some responsibility to pick a
> reasonable MAC address to begin with.  If OVS doesn't, then it causes
> confusion of its own through things like having a machine's MAC address
> change if you create a bridge and move a physical NIC onto it.  We had lots of
> experience with that early on with OVS.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] compat: Include confirm_neigh parameter if needed

2020-01-08 Thread Simon Horman
On Tue, Jan 07, 2020 at 06:13:32PM +0100, Ilya Maximets wrote:
> On 07.01.2020 18:08, Simon Horman wrote:
> > On Tue, Jan 07, 2020 at 10:34:37AM +0100, Simon Horman wrote:
> >> On Mon, Jan 06, 2020 at 01:36:34PM -0800, Greg Rose wrote:
> >>> A change backported to the Linux 4.14.162 LTS kernel requires
> >>> a boolean parameter.  Check for the presence of the parameter
> >>> and adjust the caller in that case.
> >>>
> >>> Passes check-kmod test with no regressions.
> >>>
> >>> Passes Travis build here:
> >>> https://travis-ci.org/gvrose8192/ovs-experimental/builds/633461320
> >>>
> >>> Signed-off-by: Greg Rose 
> >>
> >> Thanks Greg,
> >>
> >> I have pushed this to master with a view to quickly resolving
> >> the build problem there.
> >>
> >> I also plan to push the change to to branch-2.10 ... 2.12
> >> once I've confirmed the problem exists there and testing of those backports
> >> is complete:
> >>
> >>   branch-2.12: https://travis-ci.org/horms2/ovs/builds/633651396
> >>   branch-2.12 + backport: https://travis-ci.org/horms2/ovs/builds/633674666
> >>   branch-2.11: https://travis-ci.org/horms2/ovs/builds/633651456
> >>   branch-2.11 + backport: https://travis-ci.org/horms2/ovs/builds/633676376
> >>   branch-2.10: https://travis-ci.org/horms2/ovs/builds/633676916
> >>   branch-2.10 + backport: https://travis-ci.org/horms2/ovs/builds/633677490
> > 
> > Empirically it appears the backport is only needed
> > for branch-2.12, I have pushed it there.
> > 
> 
> We might still consider backporting.  Travis doesn't fail on branches
> before 2.12 only because commit c94e2d64f05e ("travis: Test with latest
> stable kernel releases.") was introduced in 2.12. On earlier branches
> we're using older versions of stable kernels.  These branches could still
> fail to build if we'll try to build them with 4.14.162.

Thanks, I tested that locally by building branch-2.10 and branch-2.11
locally against v4.14.161 and 4.14.162. Accordingly I have
pushed the backport to those branches.

FWIIW, I noticed that branch-2.9 does not build against newer v4.14
releases. But the problem discussed in this thread does not seem
relevant there.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [ovs-dev v2] dpif-netdev: Allow to set max capacity of flow on netdev

2020-01-08 Thread 0-day Robot
Bleep bloop.  Greetings Tonghao Zhang, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line has non-spaces leading whitespace
WARNING: Line has trailing whitespace
#54 FILE: lib/dpif-netdev.c:1141:


Lines checked: 113, Warnings: 2, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dpif-netdev: Allow to set max capacity of flow on netdev

2020-01-08 Thread Tonghao Zhang
On Wed, Jan 8, 2020 at 6:34 AM Ben Pfaff  wrote:
>
> On Mon, Dec 23, 2019 at 07:17:34PM +0800, xiangxia.m@gmail.com wrote:
> > From: Tonghao Zhang 
> >
> > There may be too many flows (> MAX_FLOWS 65536) on
> > dpif-netdev at same time. For this case, we support
> > the ovs-appctl command to change the flow max number.
> >
> > Signed-off-by: Tonghao Zhang 
>
> Thanks for the patch.
>
> I think that netdev_max_flows should be an atomic_uint32_t because it is
> accessed from multiple threads.
>
> Usually the replies to appctl commands are very terse, so that a
> successful reply to change a value would just be empty and the reply for
> getting a value would just be the raw value, and usually we don't
> include a new-line at the end.
v2 is sent, pls help me review this patch:
http://patchwork.ozlabs.org/patch/1219494/

Thanks.
> Thanks,
>
> Ben.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [ovs-dev v2] dpif-netdev: Allow to set max capacity of flow on netdev

2020-01-08 Thread xiangxia . m . yue
From: Tonghao Zhang 

For installing more than MAX_FLOWS (65536) flows to netdev datapath.
Add the ovs-appctl subcommand "dpif-netdev/pmd-set-max-flow" which
can change the flow number which netdev datapath support.

Signed-off-by: Tonghao Zhang 
---
v2:
* change int type to atomic_uint32_t
* check max flow number is whether valid (0 < max-flow < UINT_MAX).
---
 lib/dpif-netdev.c | 50 +--
 1 file changed, 48 insertions(+), 2 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 8485b54db0d8..b5ce70f7d1d1 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -97,7 +97,6 @@ DEFINE_STATIC_PER_THREAD_DATA(uint32_t, recirc_depth, 0)
 #define DEFAULT_TX_FLUSH_INTERVAL 0
 
 /* Configuration parameters. */
-enum { MAX_FLOWS = 65536 }; /* Maximum number of flows in flow table. */
 enum { MAX_METERS = 65536 };/* Maximum number of meters. */
 enum { MAX_BANDS = 8 }; /* Maximum number of bands / meter. */
 enum { N_METER_LOCKS = 64 };/* Maximum number of meters. */
@@ -105,6 +104,9 @@ enum { N_METER_LOCKS = 64 };/* Maximum number of 
meters. */
 /* Protects against changes to 'dp_netdevs'. */
 static struct ovs_mutex dp_netdev_mutex = OVS_MUTEX_INITIALIZER;
 
+/* Maximum number of flows in flow table. */
+static atomic_uint32_t netdev_max_flow = ATOMIC_VAR_INIT(65536);
+
 /* Contains all 'struct dp_netdev's. */
 static struct shash dp_netdevs OVS_GUARDED_BY(dp_netdev_mutex)
 = SHASH_INITIALIZER(_netdevs);
@@ -1112,6 +1114,40 @@ pmd_info_show_perf(struct ds *reply,
 }
 }
 
+static void
+dpif_netdev_set_max_flow(struct unixctl_conn *conn,
+ int argc OVS_UNUSED,
+ const char *argv[],
+ void *aux OVS_UNUSED)
+{
+long long max_flow = atoll(argv[1]);
+
+if (max_flow <= 0 || max_flow >= UINT_MAX) {
+unixctl_command_reply_error(conn,
+"max-flow should: > 0 and < UINT_MAX\n");
+return;
+}
+
+atomic_store_relaxed(_max_flow, max_flow);
+unixctl_command_reply(conn, NULL);
+}
+
+static void
+dpif_netdev_show_max_flow(struct unixctl_conn *conn,
+  int argc OVS_UNUSED,
+  const char *argv[] OVS_UNUSED,
+  void *aux OVS_UNUSED)
+{
+struct ds reply = DS_EMPTY_INITIALIZER;
+uint32_t max_flow;
+
+atomic_read_relaxed(_max_flow, _flow);
+
+ds_put_format(,"%u\n", max_flow);
+unixctl_command_reply(conn, ds_cstr());
+ds_destroy();
+}
+
 static int
 compare_poll_list(const void *a_, const void *b_)
 {
@@ -1416,6 +1452,14 @@ dpif_netdev_init(void)
  "[-us usec] [-q qlen]",
  0, 10, pmd_perf_log_set_cmd,
  NULL);
+unixctl_command_register("dpif-netdev/pmd-set-max-flow",
+ "number",
+ 1, 1, dpif_netdev_set_max_flow,
+ NULL);
+unixctl_command_register("dpif-netdev/pmd-show-max-flow",
+ "",
+ 0, 0, dpif_netdev_show_max_flow,
+ NULL);
 return 0;
 }
 
@@ -3335,6 +3379,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 struct dpif_flow_stats *stats)
 {
 struct dp_netdev_flow *netdev_flow;
+uint32_t max_flow;
 int error = 0;
 
 if (stats) {
@@ -3345,7 +3390,8 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 netdev_flow = dp_netdev_pmd_lookup_flow(pmd, key, NULL);
 if (!netdev_flow) {
 if (put->flags & DPIF_FP_CREATE) {
-if (cmap_count(>flow_table) < MAX_FLOWS) {
+atomic_read_relaxed(_max_flow, _flow);
+if (cmap_count(>flow_table) < max_flow) {
 dp_netdev_flow_add(pmd, match, ufid, put->actions,
put->actions_len);
 error = 0;
-- 
2.23.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] compat: Include confirm_neigh parameter if needed

2020-01-08 Thread Simon Horman
On Tue, Jan 07, 2020 at 08:35:57AM -0800, Gregory Rose wrote:
> On 1/7/2020 1:34 AM, Simon Horman wrote:
> > On Mon, Jan 06, 2020 at 01:36:34PM -0800, Greg Rose wrote:

...

> > > --- a/datapath/linux/compat/ip6_gre.c
> > > +++ b/datapath/linux/compat/ip6_gre.c
> > > @@ -1089,7 +1089,11 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct 
> > > sk_buff *skb,
> > >   /* TooBig packet may have updated dst->dev's mtu */
> > >   if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)
> > > +#ifndef HAVE_DST_OPS_CONFIRM_NEIGH
> > >   dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu);
> > > +#else
> > > + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, false);
> > > +#endif
> > Did you consider using skb_dst_update_pmtu() unconditionally?
> > That may be cleaner going forwards.
> 
> Yes, but it defaults to true for the boolean parameter to confirm the
> neighbor entry
> and I didn't want to change behavior for older kernels.  If you think that's
> not a concern
> then I'd be happy to respin the patch using skb_dst_update_pmtu().

Thanks Greg,

after reviewing things another time I agree that what you have done is
correct.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev