[ovs-dev] [branch-2.7 2/2] Prepare for 2.7.4.

2017-09-25 Thread Justin Pettit
Signed-off-by: Justin Pettit 
---
 NEWS | 4 
 configure.ac | 2 +-
 debian/changelog | 6 ++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/NEWS b/NEWS
index 2a8cc734ae4d..672e712bd75d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,7 @@
+v2.7.4 - xx xxx 
+-
+
+
 v2.7.3 - 26 Sep 2017
 -
- Bug fixes
diff --git a/configure.ac b/configure.ac
index 3f076f76fd0a..a90603ad558e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -13,7 +13,7 @@
 # limitations under the License.
 
 AC_PREREQ(2.63)
-AC_INIT(openvswitch, 2.7.3, b...@openvswitch.org)
+AC_INIT(openvswitch, 2.7.4, b...@openvswitch.org)
 AC_CONFIG_SRCDIR([datapath/datapath.c])
 AC_CONFIG_MACRO_DIR([m4])
 AC_CONFIG_AUX_DIR([build-aux])
diff --git a/debian/changelog b/debian/changelog
index 5506d7e98fc5..dfaafe848822 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,3 +1,9 @@
+openvswitch (2.7.4-1) unstable; urgency=low
+   [ Open vSwitch team ]
+   * New upstream version
+
+ -- Open vSwitch team   Tue, 25 Sep 2017 00:12:13 -0700
+
 openvswitch (2.7.3-1) unstable; urgency=low
[ Open vSwitch team ]
* New upstream version
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [branch-2.7 1/2] Set release date for 2.7.3.

2017-09-25 Thread Justin Pettit
Signed-off-by: Justin Pettit 
---
 NEWS | 4 ++--
 debian/changelog | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/NEWS b/NEWS
index 3ceb55f4c623..2a8cc734ae4d 100644
--- a/NEWS
+++ b/NEWS
@@ -1,6 +1,6 @@
-v2.7.3 - xx xxx 
+v2.7.3 - 26 Sep 2017
 -
-
+   - Bug fixes
 
 v2.7.2 - 18 Jul 2017
 -
diff --git a/debian/changelog b/debian/changelog
index 1d0236faac15..5506d7e98fc5 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -1,9 +1,8 @@
 openvswitch (2.7.3-1) unstable; urgency=low
[ Open vSwitch team ]
* New upstream version
-   - Nothing yet!
 
- -- Open vSwitch team   Tue, 18 Jul 2017 16:28:43 -0700
+ -- Open vSwitch team   Tue, 25 Sep 2017 00:12:13 -0700
 
 openvswitch (2.7.2-1) unstable; urgency=low
[ Open vSwitch team ]
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 9/9] dpif-netdev: do hw flow offload in another thread

2017-09-25 Thread Yuanhan Liu
Currently, the major trigger for hw flow offload is at upcall handling,
which is actually in the datapath. Moreover, the hw offload installation
and modification is not that lightweight. Meaning, if there are so many
flows being added or modified frequently, it could stall the datapath,
which could result to packet loss.

To diminish that, all those flow operations will be recorded and appended
to a list. A thread is then introduced to process this list (to do the
real flow offloading put/del operations). This could leave the datapath
as lightweight as possible.

Signed-off-by: Yuanhan Liu 
---
 lib/dpif-netdev.c | 301 --
 1 file changed, 249 insertions(+), 52 deletions(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 13fd012..ef5c2e9 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1949,6 +1949,223 @@ dp_netdev_pmd_find_flow_by_mark(const uint32_t mark)
 return NULL;
 }
 
+struct dp_flow_offload_item {
+struct dp_netdev_pmd_thread *pmd;
+odp_port_t in_port;
+struct dp_netdev_flow *flow;
+ovs_u128 ufid;
+int op;
+struct match match;
+struct nlattr *actions;
+size_t actions_len;
+int rxq;
+uint32_t flow_mark;
+
+struct ovs_list node;
+};
+
+struct dp_flow_offload {
+struct ovs_mutex mutex;
+struct ovs_list list;
+pthread_cond_t cond;
+};
+
+static struct dp_flow_offload dp_flow_offload = {
+.mutex = OVS_MUTEX_INITIALIZER,
+.list  = OVS_LIST_INITIALIZER(&dp_flow_offload.list),
+};
+
+static struct ovsthread_once offload_thread_once
+= OVSTHREAD_ONCE_INITIALIZER;
+
+enum {
+DP_NETDEV_FLOW_OFFLOAD_OP_ADD,
+DP_NETDEV_FLOW_OFFLOAD_OP_MOD,
+DP_NETDEV_FLOW_OFFLOAD_OP_DEL,
+};
+
+static struct dp_flow_offload_item *
+dp_netdev_alloc_flow_offload(struct dp_netdev_pmd_thread *pmd,
+  struct dp_netdev_flow *flow,
+  odp_port_t in_port, const ovs_u128 *ufid,
+  int op)
+{
+struct dp_flow_offload_item *offload;
+
+offload = xzalloc(sizeof(*offload));
+offload->pmd = pmd;
+offload->flow = flow;
+offload->in_port = in_port;
+offload->ufid = *ufid;
+offload->op = op;
+
+ovs_refcount_ref(&pmd->ref_cnt);
+ovs_refcount_ref(&flow->ref_cnt);
+
+return offload;
+}
+
+static void
+dp_netdev_free_flow_offload(struct dp_flow_offload_item *offload)
+{
+ovs_refcount_unref(&offload->pmd->ref_cnt);
+ovs_refcount_unref(&offload->flow->ref_cnt);
+
+free(offload->actions);
+free(offload);
+}
+
+static void
+dp_netdev_append_flow_offload(struct dp_flow_offload_item *offload)
+{
+ovs_mutex_lock(&dp_flow_offload.mutex);
+ovs_list_push_back(&dp_flow_offload.list, &offload->node);
+ovs_mutex_unlock(&dp_flow_offload.mutex);
+
+pthread_cond_signal(&dp_flow_offload.cond);
+}
+
+static int
+dp_netdev_flow_offload_del(struct dp_flow_offload_item *offload)
+{
+struct dp_netdev_flow *flow = offload->flow;
+struct dp_netdev_port *port;
+int ret = -1;
+
+ovs_mutex_lock(&offload->pmd->flow_mutex);
+port = dp_netdev_lookup_port(offload->pmd->dp, offload->in_port);
+if (flow->has_mark && port) {
+ret = netdev_flow_del(port->netdev, &offload->ufid, NULL);
+}
+
+dp_netdev_remove_flow_mark_map(flow->mark);
+ovsrcu_quiesce_start();
+flow->has_mark = false;
+ovs_mutex_unlock(&offload->pmd->flow_mutex);
+
+return ret;
+}
+
+static int
+dp_netdev_flow_offload_put(struct dp_flow_offload_item *offload)
+{
+struct dp_netdev_port *port;
+struct dp_netdev_flow *flow = offload->flow;
+bool create = offload->op == DP_NETDEV_FLOW_OFFLOAD_OP_ADD;
+struct offload_info info;
+int ret;
+
+port = dp_netdev_lookup_port(offload->pmd->dp, offload->in_port);
+if (!port) {
+return -1;
+}
+
+if (create) {
+if (!dp_netdev_alloc_flow_mark(&info.flow_mark)) {
+VLOG_ERR("failed to allocate flow mark!\n");
+return -1;
+}
+offload->flow_mark = info.flow_mark;
+} else {
+info.flow_mark = offload->flow_mark;
+}
+info.rxq = offload->rxq;
+
+ret = netdev_flow_put(port->netdev, &offload->match,
+  offload->actions, offload->actions_len,
+  &offload->ufid, &info, NULL);
+if (ret) {
+if (create) {
+dp_netdev_remove_flow_mark_map(info.flow_mark);
+}
+return ret;
+}
+
+ovs_mutex_lock(&offload->pmd->flow_mutex);
+if (create) {
+flow->has_mark = true;
+flow->mark = info.flow_mark;
+if (!flow->dead) {
+/*
+ * A flow could have been dead after we regain the lock,
+ * while the flow has offloaded to the netdev. When that
+ * happens, there should be an offload item in the offload
+ * list for the flow removal. To make sure the flow

[ovs-dev] [PATCH v3 8/9] netdev-dpdk: add debug for rte flow patterns

2017-09-25 Thread Yuanhan Liu
The log level will be set to DBG when this patchset is close to
being merged.

Co-authored-by: Finn Christensen 
Signed-off-by: Yuanhan Liu 
Signed-off-by: Finn Christensen 
---
 lib/netdev-dpdk.c | 177 ++
 1 file changed, 177 insertions(+)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 02c3677..80fd64b 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -3420,6 +3420,182 @@ struct flow_actions {
 };
 
 static void
+dump_flow_pattern(struct rte_flow_item *item)
+{
+if (item->type == RTE_FLOW_ITEM_TYPE_ETH) {
+const struct rte_flow_item_eth *eth_spec = item->spec;
+const struct rte_flow_item_eth *eth_mask = item->mask;
+
+VLOG_INFO("rte flow eth pattern:\n");
+if (eth_spec) {
+VLOG_INFO("spec: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", "
+ "type=0x%04" PRIx16"\n",
+ eth_spec->src.addr_bytes[0], eth_spec->src.addr_bytes[1],
+ eth_spec->src.addr_bytes[2], eth_spec->src.addr_bytes[3],
+ eth_spec->src.addr_bytes[4], eth_spec->src.addr_bytes[5],
+ eth_spec->dst.addr_bytes[0], eth_spec->dst.addr_bytes[1],
+ eth_spec->dst.addr_bytes[2], eth_spec->dst.addr_bytes[3],
+ eth_spec->dst.addr_bytes[4], eth_spec->dst.addr_bytes[5],
+ ntohs(eth_spec->type));
+} else {
+VLOG_INFO("spec = null\n");
+}
+if (eth_mask) {
+VLOG_INFO("mask: src="ETH_ADDR_FMT", dst="ETH_ADDR_FMT", "
+ "type=0x%04"PRIx16"\n",
+ eth_mask->src.addr_bytes[0], eth_mask->src.addr_bytes[1],
+ eth_mask->src.addr_bytes[2], eth_mask->src.addr_bytes[3],
+ eth_mask->src.addr_bytes[4], eth_mask->src.addr_bytes[5],
+ eth_mask->dst.addr_bytes[0], eth_mask->dst.addr_bytes[1],
+ eth_mask->dst.addr_bytes[2], eth_mask->dst.addr_bytes[3],
+ eth_mask->dst.addr_bytes[4], eth_mask->dst.addr_bytes[5],
+ eth_mask->type);
+} else {
+VLOG_INFO("mask = null\n");
+}
+}
+
+if (item->type == RTE_FLOW_ITEM_TYPE_VLAN) {
+const struct rte_flow_item_vlan *vlan_spec = item->spec;
+const struct rte_flow_item_vlan *vlan_mask = item->mask;
+
+VLOG_INFO("rte flow vlan pattern:\n");
+if (vlan_spec) {
+VLOG_INFO("spec: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
+ ntohs(vlan_spec->tpid), ntohs(vlan_spec->tci));
+} else {
+VLOG_INFO("spec = null\n");
+}
+
+if (vlan_mask) {
+VLOG_INFO("mask: tpid=0x%"PRIx16", tci=0x%"PRIx16"\n",
+ vlan_mask->tpid, vlan_mask->tci);
+} else {
+VLOG_INFO("mask = null\n");
+}
+}
+
+if (item->type == RTE_FLOW_ITEM_TYPE_IPV4) {
+const struct rte_flow_item_ipv4 *ipv4_spec = item->spec;
+const struct rte_flow_item_ipv4 *ipv4_mask = item->mask;
+
+VLOG_INFO("rte flow ipv4 pattern:\n");
+if (ipv4_spec) {
+VLOG_INFO("spec: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8
+ ", src="IP_FMT", dst="IP_FMT"\n",
+ ipv4_spec->hdr.type_of_service,
+ ipv4_spec->hdr.time_to_live,
+ ipv4_spec->hdr.next_proto_id,
+ IP_ARGS(ipv4_spec->hdr.src_addr),
+ IP_ARGS(ipv4_spec->hdr.dst_addr));
+} else {
+VLOG_INFO("spec = null\n");
+}
+if (ipv4_mask) {
+VLOG_INFO("mask: tos=0x%"PRIx8", ttl=%"PRIx8", proto=0x%"PRIx8
+ ", src="IP_FMT", dst="IP_FMT"\n",
+ ipv4_mask->hdr.type_of_service,
+ ipv4_mask->hdr.time_to_live,
+ ipv4_mask->hdr.next_proto_id,
+ IP_ARGS(ipv4_mask->hdr.src_addr),
+ IP_ARGS(ipv4_mask->hdr.dst_addr));
+} else {
+VLOG_INFO("mask = null\n");
+}
+}
+
+if (item->type == RTE_FLOW_ITEM_TYPE_UDP) {
+const struct rte_flow_item_udp *udp_spec = item->spec;
+const struct rte_flow_item_udp *udp_mask = item->mask;
+
+VLOG_INFO("rte flow udp pattern:\n");
+if (udp_spec) {
+VLOG_INFO("spec: src_port=%"PRIu16", dst_port=%"PRIu16"\n",
+ ntohs(udp_spec->hdr.src_port),
+ ntohs(udp_spec->hdr.dst_port));
+} else {
+VLOG_INFO("spec = null\n");
+}
+if (udp_mask) {
+VLOG_INFO("mask: src_port=0x%"PRIx16", dst_port=0x%"PRIx16"\n",
+ udp_mask->hdr.src_port,
+ udp_mask->hdr.dst_port);
+} else {
+VLOG_INFO("mask = null\n");
+}
+}
+
+if (item->type == RTE_FLOW_ITEM_TYPE_SCT

[ovs-dev] [PATCH v3 7/9] netdev-dpdk: remove offloaded flow on deletion

2017-09-25 Thread Yuanhan Liu
Inovke netdev class '->flow_del' method on flow deletion. The dpdk netdev
implementation will then remove the rte flow associated with the ufid.

Co-authored-by: Finn Christensen 
Signed-off-by: Yuanhan Liu 
Signed-off-by: Finn Christensen 
---

v2: - check the returned "port" from dp_netdev_lookup_port
- error log when flow is not found
---
 lib/dpif-netdev.c |  6 ++
 lib/netdev-dpdk.c | 19 ++-
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 1a70af6..13fd012 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1964,6 +1964,12 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread 
*pmd,
 dpcls_remove(cls, &flow->cr);
 cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
 if (flow->has_mark) {
+struct dp_netdev_port *port;
+
+port = dp_netdev_lookup_port(pmd->dp, in_port);
+if (port) {
+netdev_flow_del(port->netdev, &flow->ufid, NULL);
+}
 dp_netdev_remove_flow_mark_map(flow->mark);
 flow->has_mark = false;
 }
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 3e2b96b..02c3677 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -3881,6 +3881,23 @@ netdev_dpdk_flow_put(struct netdev *netdev, struct match 
*match,
 actions_len, ufid, info);
 }
 
+static int
+netdev_dpdk_flow_del(struct netdev *netdev, const ovs_u128 *ufid,
+ struct dpif_flow_stats *stats OVS_UNUSED)
+{
+
+struct rte_flow *rte_flow = get_rte_flow_by_ufid(ufid);
+
+if (!rte_flow) {
+VLOG_ERR("failed to find flow associated with ufid " UUID_FMT "\n",
+ UUID_ARGS((struct uuid *)ufid));
+return -1;
+}
+
+return netdev_dpdk_destroy_rte_flow(netdev_dpdk_cast(netdev),
+ufid, rte_flow);
+}
+
 #define DPDK_FLOW_OFFLOAD_API \
 NULL,   /* flow_flush */  \
 NULL,   /* flow_dump_create */\
@@ -3888,7 +3905,7 @@ netdev_dpdk_flow_put(struct netdev *netdev, struct match 
*match,
 NULL,   /* flow_dump_next */  \
 netdev_dpdk_flow_put, \
 NULL,   /* flow_get */\
-NULL,   /* flow_del */\
+netdev_dpdk_flow_del, \
 NULL/* init_flow_api */
 
 
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 6/9] netdev-dpdk: retry with queue action

2017-09-25 Thread Yuanhan Liu
From: Finn Christensen 

AFAIK, most (if not all) NICs (including Mellanox and Intel) do not
support a pure MARK action.  It's required to be used together with
some other actions, like QUEUE.

To workaround it, retry with a queue action when first try failed.

Moreover, some Intel's NIC (say XL710) needs the QUEUE action set
before the MARK action.

Co-authored-by: Yuanhan Liu 
Signed-off-by: Finn Christensen 
Signed-off-by: Yuanhan Liu 
---

v3: - retry when 2 specific errors are met

v2: - workarounded the queue action issue, which sets the queue index
  to 0 blindly.
- added some comments regarding to the retry with queue action
---
 lib/netdev-dpdk.c | 63 ---
 1 file changed, 56 insertions(+), 7 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 525536a..3e2b96b 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -3324,6 +3324,7 @@ static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
 struct ufid_dpdk_flow_data {
 struct hmap_node node;
 ovs_u128 ufid;
+int rxq;
 struct rte_flow *rte_flow;
 };
 
@@ -3369,7 +3370,8 @@ del_ufid_dpdk_flow_mapping(const ovs_u128 *ufid)
 
 /* Add ufid to dpdk_flow mapping */
 static inline void
-add_ufid_dpdk_flow_mapping(const ovs_u128 *ufid, struct rte_flow *rte_flow)
+add_ufid_dpdk_flow_mapping(const ovs_u128 *ufid, struct rte_flow *rte_flow,
+   int rxq)
 {
 size_t hash = hash_bytes(ufid, sizeof(*ufid), 0);
 struct ufid_dpdk_flow_data *data = xzalloc(sizeof(*data));
@@ -3384,6 +3386,7 @@ add_ufid_dpdk_flow_mapping(const ovs_u128 *ufid, struct 
rte_flow *rte_flow)
 
 data->ufid = *ufid;
 data->rte_flow = rte_flow;
+data->rxq = rxq;
 
 ovs_mutex_lock(&ufid_lock);
 hmap_insert(&ufid_dpdk_flow, &data->node, hash);
@@ -3665,15 +3668,55 @@ netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
 add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark);
 add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
 
+bool tried = 0;
+struct rte_flow_action_queue queue;
+again:
 flow = rte_flow_create(dev->port_id, &flow_attr, patterns.items,
actions.actions, &error);
-if (!flow) {
+if (!flow && !tried &&
+(error.type == RTE_FLOW_ERROR_TYPE_ACTION ||
+ error.type == RTE_FLOW_ERROR_TYPE_HANDLE)) {
+/*
+ * Seems most (if not all) NICs do not support a pure MARK
+ * action. It's required to be used together with some other
+ * actions, such as QUEUE.
+ *
+ * Thus, if flow creation fails (more precisely, if above 2
+ * errors are met), here we try again with QUEUE action.
+ *
+ * The reason why there are 2 error codes is DPDK drivers are
+ * not quite consistent on error report for this case.
+ */
+if (info->rxq < 0 || info->rxq >= netdev->n_rxq) {
+VLOG_ERR("invalid queue index: %d\n", info->rxq);
+ret = -1;
+goto out;
+}
+queue.index = info->rxq;
+
+/* re-build the action */
+actions.cnt = 0;
+/*
+ * NOTE: Intel PMD driver needs the QUEUE action set before
+ * the MARK action.
+ */
+add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_QUEUE, &queue);
+add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_MARK, &mark);
+add_flow_action(&actions, RTE_FLOW_ACTION_TYPE_END, NULL);
+
+VLOG_INFO("failed to create rte flow, "
+  "try again with QUEUE action (queue index = %d)\n",
+  info->rxq);
+tried = true;
+
+goto again;
+} else if (!flow) {
 VLOG_ERR("rte flow creat error: %u : message : %s\n",
  error.type, error.message);
 ret = -1;
 goto out;
 }
-add_ufid_dpdk_flow_mapping(ufid, flow);
+add_ufid_dpdk_flow_mapping(ufid, flow, info->rxq);
 VLOG_INFO("installed flow %p by ufid "UUID_FMT"\n",
   flow, UUID_ARGS((struct uuid *)ufid));
 
@@ -3807,17 +3850,23 @@ netdev_dpdk_flow_put(struct netdev *netdev, struct 
match *match,
  const ovs_u128 *ufid, struct offload_info *info,
  struct dpif_flow_stats *stats OVS_UNUSED)
 {
-struct rte_flow *rte_flow;
+struct ufid_dpdk_flow_data *flow_data;
 int ret;
 
 /*
  * If an old rte_flow exists, it means it's a flow modification.
  * Here destroy the old rte flow first before adding a new one.
  */
-rte_flow = get_rte_flow_by_ufid(ufid);
-if (rte_flow) {
+flow_data = find_ufid_dpdk_flow_mapping(ufid);
+if (flow_data && flow_data->rte_flow) {
+/*
+ * there is no rxq given for flow modification. Instead, we
+ * retrieve it from the rxq firstly registered here (by flow
+ * add operation).
+ */
+info->rxq = flow_data->rxq;
 ret = netdev_dpdk_destroy_rte_f

[ovs-dev] [PATCH v3 5/9] dpif-netdev: record rx queue id for the upcall

2017-09-25 Thread Yuanhan Liu
From: Shachar Beiser 

For the DPDK flow offload, which basically just binds a MARK action to
a flow, the MARK is required to be used together with a QUEUE action for
the most NICs I'm aware of. The QUEUE action then needs a queue index,
which is not given in the flow content.

Here we record the rx queue while recieving the pkts to solve above issue.

Co-authored-by: Yuanhan Liu 
Signed-off-by: Shachar Beiser 
Signed-off-by: Yuanhan Liu 
---
 lib/dp-packet.h   |  1 +
 lib/dpif-netdev.c | 19 +++
 lib/netdev.c  |  1 +
 lib/netdev.h  |  1 +
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index a7a062f..479a734 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -709,6 +709,7 @@ enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets 
in a batch. */
 struct dp_packet_batch {
 size_t count;
 bool trunc; /* true if the batch needs truncate. */
+int rxq;
 struct dp_packet *packets[NETDEV_MAX_BURST];
 };
 
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 5c33c74..1a70af6 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -2550,7 +2550,7 @@ static void
 try_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, odp_port_t in_port,
 struct dp_netdev_flow *flow, struct match *match,
 const ovs_u128 *ufid, const struct nlattr *actions,
-size_t actions_len)
+size_t actions_len, int rxq)
 {
 struct offload_info info;
 struct dp_netdev_port *port;
@@ -2562,6 +2562,7 @@ try_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, 
odp_port_t in_port,
 if (!port) {
 return;
 }
+info.rxq = rxq;
 
 if (modification) {
 info.flow_mark = flow->mark;
@@ -2598,7 +2599,8 @@ try_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, 
odp_port_t in_port,
 static struct dp_netdev_flow *
 dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
struct match *match, const ovs_u128 *ufid,
-   const struct nlattr *actions, size_t actions_len)
+   const struct nlattr *actions, size_t actions_len,
+   int rxq)
 OVS_REQUIRES(pmd->flow_mutex)
 {
 struct dp_netdev_flow *flow;
@@ -2647,7 +2649,7 @@ dp_netdev_flow_add(struct dp_netdev_pmd_thread *pmd,
 dp_netdev_flow_hash(&flow->ufid));
 
 try_netdev_flow_put(pmd, in_port, flow, match, ufid,
-actions, actions_len);
+actions, actions_len, rxq);
 
 if (OVS_UNLIKELY(!VLOG_DROP_DBG((&upcall_rl {
 struct ds ds = DS_EMPTY_INITIALIZER;
@@ -2717,7 +2719,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 if (put->flags & DPIF_FP_CREATE) {
 if (cmap_count(&pmd->flow_table) < MAX_FLOWS) {
 dp_netdev_flow_add(pmd, match, ufid, put->actions,
-   put->actions_len);
+   put->actions_len, -1);
 error = 0;
 } else {
 error = EFBIG;
@@ -2738,7 +2740,7 @@ flow_put_on_pmd(struct dp_netdev_pmd_thread *pmd,
 ovsrcu_set(&netdev_flow->actions, new_actions);
 
 try_netdev_flow_put(pmd, in_port, netdev_flow, match, ufid,
-put->actions, put->actions_len);
+put->actions, put->actions_len, -1);
 
 if (stats) {
 get_dpif_flow_stats(netdev_flow, stats);
@@ -5119,7 +5121,7 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
  struct dp_packet *packet,
  const struct netdev_flow_key *key,
  struct ofpbuf *actions, struct ofpbuf *put_actions,
- int *lost_cnt, long long now)
+ int *lost_cnt, long long now, int rxq)
 {
 struct ofpbuf *add_actions;
 struct dp_packet_batch b;
@@ -5175,7 +5177,8 @@ handle_packet_upcall(struct dp_netdev_pmd_thread *pmd,
 if (OVS_LIKELY(!netdev_flow)) {
 netdev_flow = dp_netdev_flow_add(pmd, &match, &ufid,
  add_actions->data,
- add_actions->size);
+ add_actions->size,
+ rxq);
 }
 ovs_mutex_unlock(&pmd->flow_mutex);
 emc_probabilistic_insert(pmd, key, netdev_flow);
@@ -5245,7 +5248,7 @@ fast_path_processing(struct dp_netdev_pmd_thread *pmd,
 
 miss_cnt++;
 handle_packet_upcall(pmd, packets[i], &keys[i], &actions,
- &put_actions, &lost_cnt, now);
+ &put_actions, &lost_cnt, now, packets_->rxq);
 }
 
 ofpbuf_uninit(&actions);
diff --git a/lib/netdev.c b/lib/netdev.c
index b4e570b..c9b7019 100644
--- a/lib/netdev.c
+++ b/lib/netdev.c
@@ -700,6 +700,7 @

[ovs-dev] [PATCH v3 4/9] netdev-dpdk: implement flow put with rte flow

2017-09-25 Thread Yuanhan Liu
From: Finn Christensen 

The basic yet the major part of this patch is to translate the "match"
to rte flow patterns. And then, we create a rte flow with a MARK action.
Afterwards, all pkts matches the flow will have the mark id in the mbuf.

For any unsupported flows, such as MPLS, -1 is returned, meaning the
flow offload is failed and then skipped.

Co-authored-by: Yuanhan Liu 
Signed-off-by: Finn Christensen 
Signed-off-by: Yuanhan Liu 
---

v3: - fix duplicate (and wrong) TOS assignment
- zero the pattern spec as well
- remove macros
- add missing unsupported fields

v2: - convert some macros to functions
- do not hardcode the max number of flow/action
- fix L2 patterns for Intel nic
- add comments for not implemented offload methods
---
 lib/netdev-dpdk.c | 441 +-
 1 file changed, 440 insertions(+), 1 deletion(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 1be9131..525536a 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -3404,6 +3404,445 @@ get_rte_flow_by_ufid(const ovs_u128 *ufid)
 }
 
 
+struct flow_patterns {
+struct rte_flow_item *items;
+int cnt;
+int max;
+};
+
+struct flow_actions {
+struct rte_flow_action *actions;
+int cnt;
+int max;
+};
+
+static void
+add_flow_pattern(struct flow_patterns *patterns, enum rte_flow_item_type type,
+ const void *spec, const void *mask)
+{
+int cnt = patterns->cnt;
+
+if (cnt == 0) {
+patterns->max = 8;
+patterns->items = xcalloc(patterns->max, sizeof(struct rte_flow_item));
+} else if (cnt == patterns->max) {
+patterns->max *= 2;
+patterns->items = xrealloc(patterns->items, patterns->max *
+   sizeof(struct rte_flow_item));
+}
+
+patterns->items[cnt].type = type;
+patterns->items[cnt].spec = spec;
+patterns->items[cnt].mask = mask;
+patterns->items[cnt].last = NULL;
+patterns->cnt++;
+}
+
+static void
+add_flow_action(struct flow_actions *actions, enum rte_flow_action_type type,
+const void *conf)
+{
+int cnt = actions->cnt;
+
+if (cnt == 0) {
+actions->max = 8;
+actions->actions = xcalloc(actions->max,
+   sizeof(struct rte_flow_action));
+} else if (cnt == actions->max) {
+actions->max *= 2;
+actions->actions = xrealloc(actions->actions, actions->max *
+sizeof(struct rte_flow_action));
+}
+
+actions->actions[cnt].type = type;
+actions->actions[cnt].conf = conf;
+actions->cnt++;
+}
+
+static int
+netdev_dpdk_add_rte_flow_offload(struct netdev *netdev,
+ const struct match *match,
+ struct nlattr *nl_actions OVS_UNUSED,
+ size_t actions_len OVS_UNUSED,
+ const ovs_u128 *ufid,
+ struct offload_info *info)
+{
+struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
+const struct rte_flow_attr flow_attr = {
+.group = 0,
+.priority = 0,
+.ingress = 1,
+.egress = 0
+};
+struct flow_patterns patterns = { .items = NULL, .cnt = 0 };
+struct flow_actions actions = { .actions = NULL, .cnt = 0 };
+struct rte_flow *flow;
+struct rte_flow_error error;
+uint8_t *ipv4_next_proto_mask = NULL;
+int ret = 0;
+
+/* Eth */
+struct rte_flow_item_eth eth_spec;
+struct rte_flow_item_eth eth_mask;
+memset(ð_spec, 0, sizeof(eth_spec));
+memset(ð_mask, 0, sizeof(eth_mask));
+if (!eth_addr_is_zero(match->wc.masks.dl_src) ||
+!eth_addr_is_zero(match->wc.masks.dl_dst)) {
+rte_memcpy(ð_spec.dst, &match->flow.dl_dst, sizeof(eth_spec.dst));
+rte_memcpy(ð_spec.src, &match->flow.dl_src, sizeof(eth_spec.src));
+eth_spec.type = match->flow.dl_type;
+
+rte_memcpy(ð_mask.dst, &match->wc.masks.dl_dst,
+   sizeof(eth_mask.dst));
+rte_memcpy(ð_mask.src, &match->wc.masks.dl_src,
+   sizeof(eth_mask.src));
+eth_mask.type = match->wc.masks.dl_type;
+
+add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH,
+ ð_spec, ð_mask);
+} else {
+/*
+ * If user specifies a flow (like UDP flow) without L2 patterns,
+ * OVS will at least set the dl_type. Normally, it's enough to
+ * create an eth pattern just with it. Unluckily, some Intel's
+ * NIC (such as XL710) doesn't support that. Below is a workaround,
+ * which simply matches any L2 pkts.
+ */
+add_flow_pattern(&patterns, RTE_FLOW_ITEM_TYPE_ETH, NULL, NULL);
+}
+
+/* VLAN */
+struct rte_flow_item_vlan vlan_spec;
+struct rte_flow_item_vlan vlan_mask;
+memset(&vlan_spec, 0, sizeof(vlan_spec));
+memset(&vlan_mask, 0, sizeof(vlan_mask));

[ovs-dev] [PATCH v3 3/9] netdev-dpdk: convert ufid to dpdk flow

2017-09-25 Thread Yuanhan Liu
Flows offloaded to DPDK are identified by rte_flow pointer while OVS
flows are identified by ufid. This patches adds a hmap to convert ufid
to dpdk flow (rte_flow).

Most of the code are stolen from netdev-tc-offloads.c, with some
modificatons.

Some functions are marked as "inline", which is a trick to workaround
the temp "functiond defined but not used" warnings.

Co-authored-by: Finn Christensen 
Signed-off-by: Yuanhan Liu 
Signed-off-by: Finn Christensen 
---

v3: warn on failed to find ufi_data_flow_data now assocociated with ufid
---
 lib/netdev-dpdk.c | 92 +++
 1 file changed, 92 insertions(+)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f58e9be..1be9131 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "dirs.h"
 #include "dp-packet.h"
@@ -58,6 +59,7 @@
 #include "sset.h"
 #include "unaligned.h"
 #include "timeval.h"
+#include "uuid.h"
 #include "unixctl.h"
 
 enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM};
@@ -3312,6 +3314,96 @@ unlock:
 return err;
 }
 
+/*
+ * A mapping from ufid to dpdk rte_flow pointer
+ */
+
+static struct hmap ufid_dpdk_flow = HMAP_INITIALIZER(&ufid_dpdk_flow);
+static struct ovs_mutex ufid_lock = OVS_MUTEX_INITIALIZER;
+
+struct ufid_dpdk_flow_data {
+struct hmap_node node;
+ovs_u128 ufid;
+struct rte_flow *rte_flow;
+};
+
+/*
+ * Find ufid_dpdk_flow_data node associated with @ufid
+ */
+static struct ufid_dpdk_flow_data *
+find_ufid_dpdk_flow_mapping(const ovs_u128 *ufid)
+{
+size_t hash = hash_bytes(ufid, sizeof(*ufid), 0);
+struct ufid_dpdk_flow_data *data = NULL;
+
+ovs_mutex_lock(&ufid_lock);
+HMAP_FOR_EACH_WITH_HASH (data, node, hash, &ufid_dpdk_flow) {
+if (ovs_u128_equals(*ufid, data->ufid)) {
+break;
+}
+}
+ovs_mutex_unlock(&ufid_lock);
+
+return data;
+}
+
+/*
+ * Remove ufid_dpdk_flow_data node associated with @ufid
+ */
+static inline void
+del_ufid_dpdk_flow_mapping(const ovs_u128 *ufid)
+{
+struct ufid_dpdk_flow_data *data;
+
+data = find_ufid_dpdk_flow_mapping(ufid);
+if (data) {
+ovs_mutex_lock(&ufid_lock);
+hmap_remove(&ufid_dpdk_flow, &data->node);
+free(data);
+ovs_mutex_unlock(&ufid_lock);
+} else {
+VLOG_WARN("ufid "UUID_FMT"is not installed in the dpdk flow mapping\n",
+  UUID_ARGS((struct uuid *)ufid));
+}
+}
+
+/* Add ufid to dpdk_flow mapping */
+static inline void
+add_ufid_dpdk_flow_mapping(const ovs_u128 *ufid, struct rte_flow *rte_flow)
+{
+size_t hash = hash_bytes(ufid, sizeof(*ufid), 0);
+struct ufid_dpdk_flow_data *data = xzalloc(sizeof(*data));
+
+/*
+ * We should not simply overwrite an existing rte flow.
+ * We should have deleted it first before re-adding it.
+ * Thus, if following assert triggers, something is wrong:
+ * the rte_flow is not destroyed.
+ */
+ovs_assert(find_ufid_dpdk_flow_mapping(ufid) == NULL);
+
+data->ufid = *ufid;
+data->rte_flow = rte_flow;
+
+ovs_mutex_lock(&ufid_lock);
+hmap_insert(&ufid_dpdk_flow, &data->node, hash);
+ovs_mutex_unlock(&ufid_lock);
+}
+
+/* Get rte_flow by ufid.
+ *
+ * Returns rte rte_flow if successful. Otherwise returns 0.
+ */
+static inline struct rte_flow *
+get_rte_flow_by_ufid(const ovs_u128 *ufid)
+{
+struct ufid_dpdk_flow_data *data;
+
+data = find_ufid_dpdk_flow_mapping(ufid);
+return data ? data->rte_flow : NULL;
+}
+
+
 #define NETDEV_DPDK_CLASS(NAME, INIT, CONSTRUCT, DESTRUCT,\
   SET_CONFIG, SET_TX_MULTIQ, SEND,\
   GET_CARRIER, GET_STATS, \
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 1/9] dpif-netdev: associate flow with a mark id

2017-09-25 Thread Yuanhan Liu
This patch associate a flow with a mark id (a uint32_t number) by a
direct array. The array is initiated with 1024 slots at the begninning,
and it will be enlarged when it's not big enough.  The mark id is
allocated with the help of id-pool.

It re-uses the flow API (more precisely, the ->flow_put method) to setup
the hw flow. The flow_put implementation then is supposed to create a
flow with MARK action for current stage, which is, only partial offload
is supported.

Co-authored-by: Finn Christensen 
Signed-off-by: Yuanhan Liu 
Signed-off-by: Finn Christensen 
---
 lib/dpif-netdev.c | 140 ++
 lib/netdev.h  |   6 +++
 2 files changed, 146 insertions(+)

diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index 071ec14..f417b25 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -453,6 +453,9 @@ struct dp_netdev_flow {
 struct ovs_refcount ref_cnt;
 
 bool dead;
+bool has_mark;   /* A flag to tell whether this flow has a
+valid mark asscoiated with it. */
+uint32_t mark;   /* Unique flow mark assiged to a flow */
 
 /* Statistics. */
 struct dp_netdev_flow_stats stats;
@@ -1833,6 +1836,82 @@ dp_netdev_pmd_find_dpcls(struct dp_netdev_pmd_thread 
*pmd,
 return cls;
 }
 
+/*
+ * An array to map from the flow mark (the index) to dp netdev flow.
+ * Initial size is set to 1024, it will be resized when it's not
+ * big enough.
+ */
+struct flow_mark_map {
+size_t max;
+struct ovs_mutex mutex;
+struct id_pool *mark_pool;
+struct dp_netdev_flow **flows;
+};
+
+static struct flow_mark_map flow_mark_map = {
+.max = 1024,
+.mutex = OVS_MUTEX_INITIALIZER,
+};
+
+static bool
+dp_netdev_alloc_flow_mark(uint32_t *mark)
+{
+bool succeed = true;
+
+ovs_mutex_lock(&flow_mark_map.mutex);
+
+/* Haven't initiated yet, do it here */
+if (!flow_mark_map.flows) {
+flow_mark_map.flows = xzalloc(sizeof(struct dp_netdev_flow *) *
+  flow_mark_map.max);
+flow_mark_map.mark_pool = id_pool_create(0, UINT32_MAX / 2);
+}
+
+do {
+if (!id_pool_alloc_id(flow_mark_map.mark_pool, mark)) {
+succeed = false;
+break;
+}
+
+if (*mark >= flow_mark_map.max) {
+flow_mark_map.flows = xrealloc(flow_mark_map.flows,
+   flow_mark_map.max * 2 *
+   sizeof(struct dp_netdev_flow *));
+memset(&flow_mark_map.flows[flow_mark_map.max], 0,
+   flow_mark_map.max * sizeof(struct dp_netdev_flow *));
+flow_mark_map.max *= 2;
+
+break;
+}
+} while (flow_mark_map.flows[*mark]);
+
+ovs_mutex_unlock(&flow_mark_map.mutex);
+
+return succeed;
+}
+
+static void
+dp_netdev_install_flow_mark_map(const uint32_t mark,
+struct dp_netdev_flow *netdev_flow)
+{
+ovs_mutex_lock(&flow_mark_map.mutex);
+if (mark < flow_mark_map.max) {
+flow_mark_map.flows[mark] = netdev_flow;
+}
+ovs_mutex_unlock(&flow_mark_map.mutex);
+}
+
+static void
+dp_netdev_remove_flow_mark_map(const uint32_t mark)
+{
+ovs_mutex_lock(&flow_mark_map.mutex);
+id_pool_free_id(flow_mark_map.mark_pool, mark);
+if (mark < flow_mark_map.max) {
+flow_mark_map.flows[mark] = NULL;
+}
+ovs_mutex_unlock(&flow_mark_map.mutex);
+}
+
 static void
 dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread *pmd,
   struct dp_netdev_flow *flow)
@@ -1846,6 +1925,10 @@ dp_netdev_pmd_remove_flow(struct dp_netdev_pmd_thread 
*pmd,
 ovs_assert(cls != NULL);
 dpcls_remove(cls, &flow->cr);
 cmap_remove(&pmd->flow_table, node, dp_netdev_flow_hash(&flow->ufid));
+if (flow->has_mark) {
+dp_netdev_remove_flow_mark_map(flow->mark);
+flow->has_mark = false;
+}
 flow->dead = true;
 
 dp_netdev_flow_unref(flow);
@@ -2425,6 +2508,55 @@ out:
 return error;
 }
 
+static void
+try_netdev_flow_put(struct dp_netdev_pmd_thread *pmd, odp_port_t in_port,
+struct dp_netdev_flow *flow, struct match *match,
+const ovs_u128 *ufid, const struct nlattr *actions,
+size_t actions_len)
+{
+struct offload_info info;
+struct dp_netdev_port *port;
+bool modification = flow->has_mark;
+const char *op = modification ? "modify" : "install";
+int ret;
+
+port = dp_netdev_lookup_port(pmd->dp, in_port);
+if (!port) {
+return;
+}
+
+if (modification) {
+info.flow_mark = flow->mark;
+} else {
+if (!netdev_is_flow_api_enabled()) {
+return;
+}
+
+if (!dp_netdev_alloc_flow_mark(&info.flow_mark)) {
+VLOG_ERR("failed to allocate flow mark!\n");
+return;
+}
+}
+
+ret = netdev_flow_put(p

[ovs-dev] [PATCH v3 2/9] dpif-netdev: retrieve flow directly from the flow mark

2017-09-25 Thread Yuanhan Liu
So that we could skip the heavy emc processing, notably,
the miniflow_extract function. A simple PHY-PHY forwarding
testing (with one core, one queue and one flow) shows about
70% performance improvement.

The retrievement is done at datapath, which should be light.
Unfortunately, the pthread lock is too heavy for that. Instead,
RCU is chosen.

The major race issue is that the flows array might have been
resized, aka, the address might have been changed. For example,
assume the size of the flows array is 4. And then it's resized
to 8. Then a thread might reference the 6th item of the old
array, which is wrong.

RCU firstly makes sure the flows address update is atomic. Also
it provides an barrier. Setting the new array size after the
ovsrcu_set() makes sure above issue will not happen.

Note that though the heavy miniflow_extract is skipped, we
still have to do per packet checking, due to we have to check
the tcp_flags.

Co-authored-by: Finn Christensen 
Signed-off-by: Yuanhan Liu 
Signed-off-by: Finn Christensen 
---

v3: - replace CMAP with array, which futhure improves the
  performance from 50% to 70%

- introduce some common help functions for parse_tcp_flags

v2: update tcp_flags, which also fixes the build warnings
---
 lib/dp-packet.h   |  13 +
 lib/dpif-netdev.c |  91 +---
 lib/flow.c| 155 +++---
 lib/flow.h|   1 +
 4 files changed, 209 insertions(+), 51 deletions(-)

diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 046f3ab..a7a062f 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -691,6 +691,19 @@ reset_dp_packet_checksum_ol_flags(struct dp_packet *p)
 #define reset_dp_packet_checksum_ol_flags(arg)
 #endif
 
+static inline bool
+dp_packet_has_flow_mark(struct dp_packet *p OVS_UNUSED,
+uint32_t *mark OVS_UNUSED)
+{
+#ifdef DPDK_NETDEV
+if (p->mbuf.ol_flags & PKT_RX_FDIR_ID) {
+*mark = p->mbuf.hash.fdir.hi;
+return true;
+}
+#endif
+return false;
+}
+
 enum { NETDEV_MAX_BURST = 32 }; /* Maximum number packets in a batch. */
 
 struct dp_packet_batch {
diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
index f417b25..5c33c74 100644
--- a/lib/dpif-netdev.c
+++ b/lib/dpif-netdev.c
@@ -1845,7 +1845,7 @@ struct flow_mark_map {
 size_t max;
 struct ovs_mutex mutex;
 struct id_pool *mark_pool;
-struct dp_netdev_flow **flows;
+OVSRCU_TYPE(struct dp_netdev_flow **) flows;
 };
 
 static struct flow_mark_map flow_mark_map = {
@@ -1857,16 +1857,20 @@ static bool
 dp_netdev_alloc_flow_mark(uint32_t *mark)
 {
 bool succeed = true;
+struct dp_netdev_flow **flows;
 
 ovs_mutex_lock(&flow_mark_map.mutex);
 
 /* Haven't initiated yet, do it here */
-if (!flow_mark_map.flows) {
-flow_mark_map.flows = xzalloc(sizeof(struct dp_netdev_flow *) *
-  flow_mark_map.max);
+if (!flow_mark_map.mark_pool) {
 flow_mark_map.mark_pool = id_pool_create(0, UINT32_MAX / 2);
+
+flows = xzalloc(sizeof(struct dp_netdev_flow *) * flow_mark_map.max);
+ovsrcu_set(&flow_mark_map.flows, flows);
 }
 
+flows = ovsrcu_get_protected(struct dp_netdev_flow **,
+ &flow_mark_map.flows);
 do {
 if (!id_pool_alloc_id(flow_mark_map.mark_pool, mark)) {
 succeed = false;
@@ -1874,16 +1878,28 @@ dp_netdev_alloc_flow_mark(uint32_t *mark)
 }
 
 if (*mark >= flow_mark_map.max) {
-flow_mark_map.flows = xrealloc(flow_mark_map.flows,
-   flow_mark_map.max * 2 *
-   sizeof(struct dp_netdev_flow *));
-memset(&flow_mark_map.flows[flow_mark_map.max], 0,
-   flow_mark_map.max * sizeof(struct dp_netdev_flow *));
+struct dp_netdev_flow **new_flows;
+
+new_flows = xmalloc(2 * flow_mark_map.max *
+sizeof(struct dp_netdev_flow *));
+memcpy(new_flows, flows, sizeof(struct dp_netdev_flow *) *
+ flow_mark_map.max);
+memset(&new_flows[flow_mark_map.max], 0,
+   sizeof(struct dp_netdev_flow *) * flow_mark_map.max);
+ovsrcu_set(&flow_mark_map.flows, new_flows);
+/*
+ * Set the new size after above ovsrcu_set, which will make
+ * sure the thread still referencing the old array will not
+ * go beyond the old max.
+ */
 flow_mark_map.max *= 2;
 
+ovsrcu_synchronize();
+free(flows);
+flows = new_flows;
 break;
 }
-} while (flow_mark_map.flows[*mark]);
+} while (flows[*mark]);
 
 ovs_mutex_unlock(&flow_mark_map.mutex);
 
@@ -1894,9 +1910,13 @@ static void
 dp_netdev_install_flow_mark_map(const uint32_t mark,
   

[ovs-dev] [PATCH v3 0/9] OVS-DPDK flow offload with rte_flow

2017-09-25 Thread Yuanhan Liu
Some hightlights in v3
==

Here is the v3, with 2 major changes and further testings (including
many flows). This took more effort than I thought, thus v3 publication
has been delayed for a while.

The first major change is the mark and id association is done with array
instead of CMAP now. This gives us further performance gain: it could
be up to 70% now (please see the exact number below).

This change also make the code a bit more complex though, due to the
lock issue. RCU is used (not quite sure it's been used rightly though).
For now, RCU only protects the array base address update (due to
reallocate), it doesn't protect the array item (array[i] = xx]) change.
I think it's buggy and I need rethink about it.

The second major change is there is a thread introduced to do the real
flow offload. This is for diminishing the overhead by hw flow offload
installation/deletion at data path. See patch 9 for more detailed info.

In the last discussion, RSS action was suggested to use to get rid of
the QUEUE action workaround. Unfortunately, it didn't work. The flow
creation failed with MLX5 PMD driver, and that's the only driver in
DPDK that supports RSS action so far.

I also tested many flows this time. The result is more exciting: it
could be up to 267% boost, with 512 mega flows (with each has another
512 exact matching flows, thus it's 512*512=256K flows in total), one
core and one queue doing PHY-PHY forwarding. For the offload case, the
performance keeps the same as with one flow only: because the cost of
the mark to flow translation is constant, no matter how many flows
are inserted (as far as they are all offloaded). However, for the
vanilla ovs-dpdk, the more flows, the worse the performance is. In
another word, the more flows, the bigger difference we will see.

There were too many discussions in last version. I'm sorry if I missed
some comments and didn't do the corresponding changes in v3. Please let
me know if I made such mistakes.

And below are the formal cover letter introduction, for someone who
is the first time to see this patchset.

---
Hi,

Here is a joint work from Mellanox and Napatech, to enable the flow hw
offload with the DPDK generic flow interface (rte_flow).

The basic idea is to associate the flow with a mark id (a unit32_t number).
Later, we then get the flow directly from the mark id, bypassing the heavy
emc processing, including miniflow_extract.

The association is done with array in patch 1. It also reuses the flow
APIs introduced while adding the tc offloads. The emc bypassing is done
in patch 2. The flow offload is done in patch 4, which mainly does two
things:

- translate the ovs match to DPDK rte flow patterns
- bind those patterns with a MARK action.

Afterwards, the NIC will set the mark id in every pkt's mbuf when it
matches the flow. That's basically how we could get the flow directly
from the received mbuf.

While testing with PHY-PHY forwarding with one core, one queue and one
flow, I got about 70% performance boost. For PHY-vhost forwarding, I got
about 50% performance boost. It's basically the performance I got with v1,
when the tcp_flags is the ignored. In summary, the CMPA to array change
gives up yet another 16% performance boost.

The major issue mentioned in v1 is also workarounded: the queue index
is never set to 0 blindly anymore, but set to the rxq that first
receives the upcall pkt.

Note that it's disabled by default, which can be enabled by:

$ ovs-vsctl set Open_vSwitch . other_config:hw-offload=true

v3: - The mark and id association is done with array instead of CMAP.
- Added a thread to do hw offload operations
- Removed macros completely
- dropped the patch to set FDIR_CONF, which is a workround some
  Intel NICs.
- Added a debug patch to show all flow patterns we have created.
- Misc fixes

v2: - workaround the queue action issue
- fixed the tcp_flags being skipped issue, which also fixed the
  build warnings
- fixed l2 patterns for Intel nic
- Converted some macros to functions
- did not hardcode the max number of flow/action
- rebased on top of the latest code

Thanks.

--yliu

---
Finn Christensen (2):
  netdev-dpdk: implement flow put with rte flow
  netdev-dpdk: retry with queue action

Shachar Beiser (1):
  dpif-netdev: record rx queue id for the upcall

Yuanhan Liu (6):
  dpif-netdev: associate flow with a mark id
  dpif-netdev: retrieve flow directly from the flow mark
  netdev-dpdk: convert ufid to dpdk flow
  netdev-dpdk: remove offloaded flow on deletion
  netdev-dpdk: add debug for rte flow patterns
  dpif-netdev: do hw flow offload in another thread

 lib/dp-packet.h   |  14 +
 lib/dpif-netdev.c | 421 -
 lib/flow.c| 155 ---
 lib/flow.h|   1 +
 lib/netdev-dpdk.c | 776 +-
 lib/netdev.c  |   1 +
 lib/netdev.h  |   7 +
 7 files changed, 1331 insertions(+

Re: [ovs-dev] [PATCH net-next v9] openvswitch: enable NSH support

2017-09-25 Thread Yang, Yi
On Tue, Sep 26, 2017 at 03:28:42AM +0800, Eric Garver wrote:
> On Mon, Sep 25, 2017 at 10:16:09PM +0800, Yi Yang wrote:
> > +
> > +   length = nsh_hdr_len(nsh_hdr);
> > +   skb_pull(skb, length);
> 
> Do you need to verify you can actually pull length bytes? I don't see
> any guarantee.

I have added skb length check in pop_nsh, so that can verify this.

> > +   err = skb_ensure_writable(skb, skb_network_offset(skb) +
> > + sizeof(struct nshhdr));
> 
> This calls pskb_may_pull(), but you're not pulling any data here.

set_ipv4 and set_ipv6 also used skb_ensure_writable to check if skb
has enough header length, they didn't call skb_pull in the following
part, so I think this is ok.

I have sent out v10 to fix all of your comments for v9, please help
review v10, thanks a lot.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v9] openvswitch: enable NSH support

2017-09-25 Thread Yang, Yi
On Tue, Sep 26, 2017 at 02:14:39AM +0800, Jiri Benc wrote:
> On Mon, 25 Sep 2017 22:16:09 +0800, Yi Yang wrote:
> > +   return err;
> > +
> > +   key->eth.type = htons(ETH_P_NSH);
> 
> I wonder why you have this assignment here. The key is invalidated,
> thus nothing should rely on key->eth.type. However, looking at the code
> and ovs_fragment in particular, I'm not sure that's the case. Could you
> please explain why it is needed? And why the reverse of it is not
> needed in pop_nsh?

After push_nsh, the packet won't be recirculated to flow pipeline, so
key->eth.type must be set explicitly here, but for pop_nsh, the packet
will be recirculated to flow pipeline, it will be reparsed, so
key->eth.type will be set in packet parse function, we needn't handle it
in pop_nsh.

I have sent out v10 to fix all the comments for v9, please review v10,
thanks a lot.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v10] openvswitch: enable NSH support

2017-09-25 Thread Yi Yang
v9->v10
 - Change struct ovs_key_nsh to
   struct ovs_nsh_key_base base;
   __be32 context[NSH_MD1_CONTEXT_SIZE];
 - Fix new comments for v9

v8->v9
 - Fix build error reported by daily intel build
   because nsh module isn't selected by openvswitch

v7->v8
 - Rework nested value and mask for OVS_KEY_ATTR_NSH
 - Change pop_nsh to adapt to nsh kernel module
 - Fix many issues per comments from Jiri Benc

v6->v7
 - Remove NSH GSO patches in v6 because Jiri Benc
   reworked it as another patch series and they have
   been merged.
 - Change it to adapt to nsh kernel module added by NSH
   GSO patch series

v5->v6
 - Fix the rest comments for v4.
 - Add NSH GSO support for VxLAN-gpe + NSH and
   Eth + NSH.

v4->v5
 - Fix many comments by Jiri Benc and Eric Garver
   for v4.

v3->v4
 - Add new NSH match field ttl
 - Update NSH header to the latest format
   which will be final format and won't change
   per its author's confirmation.
 - Fix comments for v3.

v2->v3
 - Change OVS_KEY_ATTR_NSH to nested key to handle
   length-fixed attributes and length-variable
   attriubte more flexibly.
 - Remove struct ovs_action_push_nsh completely
 - Add code to handle nested attribute for SET_MASKED
 - Change PUSH_NSH to use the nested OVS_KEY_ATTR_NSH
   to transfer NSH header data.
 - Fix comments and coding style issues by Jiri and Eric

v1->v2
 - Change encap_nsh and decap_nsh to push_nsh and pop_nsh
 - Dynamically allocate struct ovs_action_push_nsh for
   length-variable metadata.

OVS master and 2.8 branch has merged NSH userspace
patch series, this patch is to enable NSH support
in kernel data path in order that OVS can support
NSH in compat mode by porting this.

Signed-off-by: Yi Yang 
---
 include/net/nsh.h|   3 +
 include/uapi/linux/openvswitch.h |  29 
 net/nsh/nsh.c|  53 +++
 net/openvswitch/Kconfig  |   1 +
 net/openvswitch/actions.c| 116 ++
 net/openvswitch/flow.c   |  51 +++
 net/openvswitch/flow.h   |   7 +
 net/openvswitch/flow_netlink.c   | 317 ++-
 net/openvswitch/flow_netlink.h   |   5 +
 9 files changed, 581 insertions(+), 1 deletion(-)

diff --git a/include/net/nsh.h b/include/net/nsh.h
index a1eaea2..c03b089 100644
--- a/include/net/nsh.h
+++ b/include/net/nsh.h
@@ -304,4 +304,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr 
*nsh, u8 flags,
NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
 }
 
+int skb_push_nsh(struct sk_buff *skb, const struct nshhdr *nh);
+int skb_pop_nsh(struct sk_buff *skb);
+
 #endif /* __NET_NSH_H */
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 156ee4c..c1a785c 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -333,6 +333,7 @@ enum ovs_key_attr {
OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
+   OVS_KEY_ATTR_NSH,   /* Nested set of ovs_nsh_key_* */
 
 #ifdef __KERNEL__
OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
@@ -491,6 +492,30 @@ struct ovs_key_ct_tuple_ipv6 {
__u8   ipv6_proto;
 };
 
+enum ovs_nsh_key_attr {
+   OVS_NSH_KEY_ATTR_UNSPEC,
+   OVS_NSH_KEY_ATTR_BASE,  /* struct ovs_nsh_key_base. */
+   OVS_NSH_KEY_ATTR_MD1,   /* struct ovs_nsh_key_md1. */
+   OVS_NSH_KEY_ATTR_MD2,   /* variable-length octets for MD type 2. */
+   __OVS_NSH_KEY_ATTR_MAX
+};
+
+#define OVS_NSH_KEY_ATTR_MAX (__OVS_NSH_KEY_ATTR_MAX - 1)
+
+struct ovs_nsh_key_base {
+   __u8 flags;
+   __u8 ttl;
+   __u8 mdtype;
+   __u8 np;
+   __be32 path_hdr;
+};
+
+#define NSH_MD1_CONTEXT_SIZE 4
+
+struct ovs_nsh_key_md1 {
+   __be32 context[NSH_MD1_CONTEXT_SIZE];
+};
+
 /**
  * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
  * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
@@ -806,6 +831,8 @@ struct ovs_action_push_eth {
  * packet.
  * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
  * packet.
+ * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet.
+ * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -835,6 +862,8 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_TRUNC,/* u32 struct ovs_action_trunc. */
OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */
OVS_ACTION_ATTR_POP_ETH,  /* No argument. */
+   OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */
+   OVS_ACTION_ATTR_POP_NSH,  /* No argument. */
 
__OVS_ACTION_ATTR_MAX,/* Nothing past this will be accepted

Re: [ovs-dev] [patch v3 2/5] conntrack: Add function ct_print_conn_info().

2017-09-25 Thread Darrell Ball
Note that this patch gave me a checkpatch error on this line
+if (RL_ON) {\

which I ignored since I did not see a valid reason for the complaint.

Darrell



On 9/25/17, 8:53 PM, "ovs-dev-boun...@openvswitch.org on behalf of Darrell 
Ball"  wrote:

A new debug function is added and used in a
subsequent patch.

Acked-by: Antonio Fischetti 
Signed-off-by: Darrell Ball 
---
 lib/conntrack.c | 58 
+
 1 file changed, 58 insertions(+)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index cccba10..6da42bb 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -67,6 +67,9 @@ enum ct_alg_mode {
 CT_TFTP_MODE,
 };
 
+void ct_print_conn_info(struct conn *c, char *log_msg, enum vlog_level vll,
+bool force, bool rl_on);
+
 static bool conn_key_extract(struct conntrack *, struct dp_packet *,
  ovs_be16 dl_type, struct conn_lookup_ctx *,
  uint16_t zone);
@@ -223,6 +226,61 @@ conn_key_cmp(const struct conn_key *key1, const struct 
conn_key *key2)
 return 1;
 }
 
+void
+ct_print_conn_info(struct conn *c, char *log_msg, enum vlog_level vll,
+   bool force, bool rl_on)
+{
+#define CT_VLOG(RL_ON, LEVEL, ...) 
 \
+do {   
 \
+if (RL_ON) {   
 \
+static struct vlog_rate_limit rl_ = VLOG_RATE_LIMIT_INIT(5, 
5); \
+vlog_rate_limit(&this_module, LEVEL, &rl_, __VA_ARGS__);   
 \
+} else {   
 \
+vlog(&this_module, LEVEL, __VA_ARGS__);
 \
+}  
 \
+} while (0)
+
+if (OVS_UNLIKELY(force || vlog_is_enabled(&this_module, vll))) {
+if (c->key.dl_type == htons(ETH_TYPE_IP)) {
+CT_VLOG(rl_on, vll, "%s: src ip "IP_FMT" dst ip "IP_FMT" rev 
src "
+"ip "IP_FMT" rev dst ip "IP_FMT" src/dst ports "
+"%"PRIu16"/%"PRIu16" rev src/dst ports "
+"%"PRIu16"/%"PRIu16" zone/rev zone "
+"%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto "
+"%"PRIu8"/%"PRIu8, log_msg,
+IP_ARGS(c->key.src.addr.ipv4_aligned),
+IP_ARGS(c->key.dst.addr.ipv4_aligned),
+IP_ARGS(c->rev_key.src.addr.ipv4_aligned),
+IP_ARGS(c->rev_key.dst.addr.ipv4_aligned),
+ntohs(c->key.src.port), ntohs(c->key.dst.port),
+ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port),
+c->key.zone, c->rev_key.zone, c->key.nw_proto,
+c->rev_key.nw_proto);
+} else {
+char ip6_s[INET6_ADDRSTRLEN];
+inet_ntop(AF_INET6, &c->key.src.addr.ipv6, ip6_s, sizeof 
ip6_s);
+char ip6_d[INET6_ADDRSTRLEN];
+inet_ntop(AF_INET6, &c->key.dst.addr.ipv6, ip6_d, sizeof 
ip6_d);
+char ip6_rs[INET6_ADDRSTRLEN];
+inet_ntop(AF_INET6, &c->rev_key.src.addr.ipv6, ip6_rs,
+  sizeof ip6_rs);
+char ip6_rd[INET6_ADDRSTRLEN];
+inet_ntop(AF_INET6, &c->rev_key.dst.addr.ipv6, ip6_rd,
+  sizeof ip6_rd);
+
+CT_VLOG(rl_on, vll, "%s: src ip %s dst ip %s rev src ip %s"
+" rev dst ip %s src/dst ports %"PRIu16"/%"PRIu16
+" rev src/dst ports %"PRIu16"/%"PRIu16" zone/rev zone "
+"%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto "
+"%"PRIu8"/%"PRIu8, log_msg, ip6_s, ip6_d, ip6_rs,
+ip6_rd, ntohs(c->key.src.port), ntohs(c->key.dst.port),
+ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port),
+c->key.zone, c->rev_key.zone, c->key.nw_proto,
+c->rev_key.nw_proto);
+}
+}
+}
+
 /* Initializes the connection tracker 'ct'.  The caller is responsible for
  * calling 'conntrack_destroy()', when the instance is not needed anymore 
*/
 void
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org

https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=FtGc7ekw4Jj5RMIiyX1uN9tCiUFXlXH

[ovs-dev] [patch v3 5/5] conntrack: Minor performance enhancement.

2017-09-25 Thread Darrell Ball
Add an OVS_UNLIKELY and reorder a few variable condition
checks.

Acked-by: Bhanuprakash Bodireddy 
Signed-off-by: Darrell Ball 
---
 lib/conntrack.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 3026772..33d2a21 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -1189,7 +1189,7 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
 
 bool tftp_ctl = is_tftp_ctl(pkt);
 struct conn conn_for_expectation;
-if (conn && (ftp_ctl || tftp_ctl)) {
+if (OVS_UNLIKELY((ftp_ctl || tftp_ctl) && conn)) {
 conn_for_expectation = *conn;
 }
 
@@ -1200,10 +1200,10 @@ process_one(struct conntrack *ct, struct dp_packet *pkt,
 }
 
 /* FTP control packet handling with expectation creation. */
-if (OVS_UNLIKELY(conn && ftp_ctl)) {
+if (OVS_UNLIKELY(ftp_ctl && conn)) {
 handle_ftp_ctl(ct, ctx, pkt, &conn_for_expectation,
now, CT_FTP_CTL_INTEREST, !!nat_action_info);
-} else if (OVS_UNLIKELY(conn && tftp_ctl)) {
+} else if (OVS_UNLIKELY(tftp_ctl && conn)) {
 handle_tftp_ctl(ct, &conn_for_expectation, now);
 }
 }
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [patch v3 4/5] conntrack: Fix clang static analysis reports.

2017-09-25 Thread Darrell Ball
These dead assignment warnings do not affect functionality.
In one case, a local variable could be removed and in another
case, the working pointer should be used rather than the start
pointer.

Fixes: bd5e81a0e596 ("Userspace Datapath: Add ALG infra and FTP.")
Reported-by: Bhanuprakash Bodireddy 
Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2017-September/338515.html
Acked-by: Bhanuprakash Bodireddy 
Signed-off-by: Darrell Ball 
---
 lib/conntrack.c | 12 
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 0ae5a10..3026772 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -2712,7 +2712,7 @@ process_ftp_ctl_v4(struct conntrack *ct,
 
 char *ftp = ftp_msg;
 enum ct_alg_mode mode;
-if (!strncasecmp(ftp_msg, FTP_PORT_CMD, strlen(FTP_PORT_CMD))) {
+if (!strncasecmp(ftp, FTP_PORT_CMD, strlen(FTP_PORT_CMD))) {
 ftp = ftp_msg + strlen(FTP_PORT_CMD);
 mode = CT_FTP_MODE_ACTIVE;
 } else {
@@ -2858,7 +2858,7 @@ process_ftp_ctl_v6(struct conntrack *ct,
 
 char *ftp = ftp_msg;
 struct in6_addr ip6_addr;
-if (!strncasecmp(ftp_msg, FTP_EPRT_CMD, strlen(FTP_EPRT_CMD))) {
+if (!strncasecmp(ftp, FTP_EPRT_CMD, strlen(FTP_EPRT_CMD))) {
 ftp = ftp_msg + strlen(FTP_EPRT_CMD);
 ftp = skip_non_digits(ftp);
 if (*ftp != FTP_AF_V6 || isdigit(ftp[1])) {
@@ -3001,10 +3001,8 @@ handle_ftp_ctl(struct conntrack *ct, const struct 
conn_lookup_ctx *ctx,
 
 struct ovs_16aligned_ip6_hdr *nh6 = dp_packet_l3(pkt);
 int64_t seq_skew = 0;
-bool seq_skew_dir;
 if (ftp_ctl == CT_FTP_CTL_OTHER) {
 seq_skew = conn_for_expectation->seq_skew;
-seq_skew_dir = conn_for_expectation->seq_skew_dir;
 } else if (ftp_ctl == CT_FTP_CTL_INTEREST) {
 enum ftp_ctl_pkt rc;
 if (ctx->key.dl_type == htons(ETH_TYPE_IPV6)) {
@@ -3028,18 +3026,16 @@ handle_ftp_ctl(struct conntrack *ct, const struct 
conn_lookup_ctx *ctx,
 seq_skew = repl_ftp_v6_addr(pkt, v6_addr_rep, ftp_data_start,
 addr_offset_from_ftp_data_start,
 addr_size, mode);
-seq_skew_dir = ctx->reply;
 if (seq_skew) {
 ip_len = ntohs(nh6->ip6_ctlun.ip6_un1.ip6_un1_plen);
 ip_len += seq_skew;
 nh6->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(ip_len);
 conn_seq_skew_set(ct, &conn_for_expectation->key, now,
-  seq_skew, seq_skew_dir);
+  seq_skew, ctx->reply);
 }
 } else {
 seq_skew = repl_ftp_v4_addr(pkt, v4_addr_rep, ftp_data_start,
 addr_offset_from_ftp_data_start);
-seq_skew_dir = ctx->reply;
 ip_len = ntohs(l3_hdr->ip_tot_len);
 if (seq_skew) {
 ip_len += seq_skew;
@@ -3047,7 +3043,7 @@ handle_ftp_ctl(struct conntrack *ct, const struct 
conn_lookup_ctx *ctx,
   l3_hdr->ip_tot_len, htons(ip_len));
 l3_hdr->ip_tot_len = htons(ip_len);
 conn_seq_skew_set(ct, &conn_for_expectation->key, now,
-  seq_skew, seq_skew_dir);
+  seq_skew, ctx->reply);
 }
 }
 } else {
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [patch v3 3/5] conntrack: Tighten handling of alg reverse conns.

2017-09-25 Thread Darrell Ball
Close a theoretical race delete/create corner case for alg
reverse conns and add debugging around this that may point to
an intentional exploit, unintentional problem or just a rare
condition. The solution is to keep track of reverse conn via
nat_conn_keys and avoid deleting the reverse conn when it has been
recreated.

Fixes: bd5e81a0e596 ("Userspace Datapath: Add ALG infra and FTP.")
Signed-off-by: Darrell Ball 
---
 lib/conntrack.c | 34 --
 1 file changed, 28 insertions(+), 6 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 6da42bb..0ae5a10 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -67,9 +67,6 @@ enum ct_alg_mode {
 CT_TFTP_MODE,
 };
 
-void ct_print_conn_info(struct conn *c, char *log_msg, enum vlog_level vll,
-bool force, bool rl_on);
-
 static bool conn_key_extract(struct conntrack *, struct dp_packet *,
  ovs_be16 dl_type, struct conn_lookup_ctx *,
  uint16_t zone);
@@ -226,7 +223,7 @@ conn_key_cmp(const struct conn_key *key1, const struct 
conn_key *key2)
 return 1;
 }
 
-void
+static void
 ct_print_conn_info(struct conn *c, char *log_msg, enum vlog_level vll,
bool force, bool rl_on)
 {
@@ -835,6 +832,18 @@ conn_not_found(struct conntrack *ct, struct dp_packet *pkt,
 nc->nat_info->nat_action = NAT_ACTION_DST;
 }
 *conn_for_un_nat_copy = *nc;
+ct_rwlock_wrlock(&ct->resources_lock);
+bool new_insert = nat_conn_keys_insert(&ct->nat_conn_keys,
+   conn_for_un_nat_copy,
+   ct->hash_basis);
+ct_rwlock_unlock(&ct->resources_lock);
+if (!new_insert) {
+char *log_msg = xasprintf("Pre-existing alg "
+  "nat_conn_key");
+ct_print_conn_info(conn_for_un_nat_copy, log_msg, VLL_INFO,
+   true, false);
+free(log_msg);
+}
 } else {
 *conn_for_un_nat_copy = *nc;
 ct_rwlock_wrlock(&ct->resources_lock);
@@ -936,8 +945,16 @@ create_un_nat_conn(struct conntrack *ct, struct conn 
*conn_for_un_nat_copy,
 struct conn *rev_conn = conn_lookup(ct, &nc->key, now);
 
 if (alg_un_nat) {
-hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
-&nc->node, un_nat_hash);
+if (!rev_conn) {
+hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
+&nc->node, un_nat_hash);
+} else {
+char *log_msg = xasprintf("Unusual condition for un_nat conn "
+  "create for alg: rev_conn %p", rev_conn);
+ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
+free(log_msg);
+free(nc);
+}
 } else {
 ct_rwlock_rdlock(&ct->resources_lock);
 
@@ -949,6 +966,11 @@ create_un_nat_conn(struct conntrack *ct, struct conn 
*conn_for_un_nat_copy,
 hmap_insert(&ct->buckets[un_nat_conn_bucket].connections,
 &nc->node, un_nat_hash);
 } else {
+char *log_msg = xasprintf("Unusual condition for un_nat conn "
+  "create: nat_conn_key_node/rev_conn "
+  "%p/%p", nat_conn_key_node, rev_conn);
+ct_print_conn_info(nc, log_msg, VLL_INFO, true, false);
+free(log_msg);
 free(nc);
 }
 ct_rwlock_unlock(&ct->resources_lock);
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [patch v3 2/5] conntrack: Add function ct_print_conn_info().

2017-09-25 Thread Darrell Ball
A new debug function is added and used in a
subsequent patch.

Acked-by: Antonio Fischetti 
Signed-off-by: Darrell Ball 
---
 lib/conntrack.c | 58 +
 1 file changed, 58 insertions(+)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index cccba10..6da42bb 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -67,6 +67,9 @@ enum ct_alg_mode {
 CT_TFTP_MODE,
 };
 
+void ct_print_conn_info(struct conn *c, char *log_msg, enum vlog_level vll,
+bool force, bool rl_on);
+
 static bool conn_key_extract(struct conntrack *, struct dp_packet *,
  ovs_be16 dl_type, struct conn_lookup_ctx *,
  uint16_t zone);
@@ -223,6 +226,61 @@ conn_key_cmp(const struct conn_key *key1, const struct 
conn_key *key2)
 return 1;
 }
 
+void
+ct_print_conn_info(struct conn *c, char *log_msg, enum vlog_level vll,
+   bool force, bool rl_on)
+{
+#define CT_VLOG(RL_ON, LEVEL, ...)  \
+do {\
+if (RL_ON) {\
+static struct vlog_rate_limit rl_ = VLOG_RATE_LIMIT_INIT(5, 5); \
+vlog_rate_limit(&this_module, LEVEL, &rl_, __VA_ARGS__);\
+} else {\
+vlog(&this_module, LEVEL, __VA_ARGS__); \
+}   \
+} while (0)
+
+if (OVS_UNLIKELY(force || vlog_is_enabled(&this_module, vll))) {
+if (c->key.dl_type == htons(ETH_TYPE_IP)) {
+CT_VLOG(rl_on, vll, "%s: src ip "IP_FMT" dst ip "IP_FMT" rev src "
+"ip "IP_FMT" rev dst ip "IP_FMT" src/dst ports "
+"%"PRIu16"/%"PRIu16" rev src/dst ports "
+"%"PRIu16"/%"PRIu16" zone/rev zone "
+"%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto "
+"%"PRIu8"/%"PRIu8, log_msg,
+IP_ARGS(c->key.src.addr.ipv4_aligned),
+IP_ARGS(c->key.dst.addr.ipv4_aligned),
+IP_ARGS(c->rev_key.src.addr.ipv4_aligned),
+IP_ARGS(c->rev_key.dst.addr.ipv4_aligned),
+ntohs(c->key.src.port), ntohs(c->key.dst.port),
+ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port),
+c->key.zone, c->rev_key.zone, c->key.nw_proto,
+c->rev_key.nw_proto);
+} else {
+char ip6_s[INET6_ADDRSTRLEN];
+inet_ntop(AF_INET6, &c->key.src.addr.ipv6, ip6_s, sizeof ip6_s);
+char ip6_d[INET6_ADDRSTRLEN];
+inet_ntop(AF_INET6, &c->key.dst.addr.ipv6, ip6_d, sizeof ip6_d);
+char ip6_rs[INET6_ADDRSTRLEN];
+inet_ntop(AF_INET6, &c->rev_key.src.addr.ipv6, ip6_rs,
+  sizeof ip6_rs);
+char ip6_rd[INET6_ADDRSTRLEN];
+inet_ntop(AF_INET6, &c->rev_key.dst.addr.ipv6, ip6_rd,
+  sizeof ip6_rd);
+
+CT_VLOG(rl_on, vll, "%s: src ip %s dst ip %s rev src ip %s"
+" rev dst ip %s src/dst ports %"PRIu16"/%"PRIu16
+" rev src/dst ports %"PRIu16"/%"PRIu16" zone/rev zone "
+"%"PRIu16"/%"PRIu16" nw_proto/rev nw_proto "
+"%"PRIu8"/%"PRIu8, log_msg, ip6_s, ip6_d, ip6_rs,
+ip6_rd, ntohs(c->key.src.port), ntohs(c->key.dst.port),
+ntohs(c->rev_key.src.port), ntohs(c->rev_key.dst.port),
+c->key.zone, c->rev_key.zone, c->key.nw_proto,
+c->rev_key.nw_proto);
+}
+}
+}
+
 /* Initializes the connection tracker 'ct'.  The caller is responsible for
  * calling 'conntrack_destroy()', when the instance is not needed anymore */
 void
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [patch v3 1/5] conntrack: Create nat_conn_keys_insert().

2017-09-25 Thread Darrell Ball
Create a separate function from existing code, so the
code can be reused in a subsequent patch; no change
in functionality.

Acked-by: Bhanuprakash Bodireddy 
Signed-off-by: Darrell Ball 
---
 lib/conntrack.c | 41 -
 1 file changed, 28 insertions(+), 13 deletions(-)

diff --git a/lib/conntrack.c b/lib/conntrack.c
index 419cb1d..cccba10 100644
--- a/lib/conntrack.c
+++ b/lib/conntrack.c
@@ -96,6 +96,11 @@ nat_conn_keys_lookup(struct hmap *nat_conn_keys,
  const struct conn_key *key,
  uint32_t basis);
 
+static bool
+nat_conn_keys_insert(struct hmap *nat_conn_keys,
+ const struct conn *nat_conn,
+ uint32_t hash_basis);
+
 static void
 nat_conn_keys_remove(struct hmap *nat_conn_keys,
  const struct conn_key *key,
@@ -2065,19 +2070,9 @@ nat_select_range_tuple(struct conntrack *ct, const 
struct conn *conn,
 nat_conn->rev_key.src.port = htons(port);
 }
 
-struct nat_conn_key_node *nat_conn_key_node =
-nat_conn_keys_lookup(&ct->nat_conn_keys, &nat_conn->rev_key,
- ct->hash_basis);
-
-if (!nat_conn_key_node) {
-struct nat_conn_key_node *nat_conn_key =
-xzalloc(sizeof *nat_conn_key);
-nat_conn_key->key = nat_conn->rev_key;
-nat_conn_key->value = nat_conn->key;
-uint32_t nat_conn_key_hash = conn_key_hash(&nat_conn_key->key,
-   ct->hash_basis);
-hmap_insert(&ct->nat_conn_keys, &nat_conn_key->node,
-nat_conn_key_hash);
+bool new_insert = nat_conn_keys_insert(&ct->nat_conn_keys, nat_conn,
+   ct->hash_basis);
+if (new_insert) {
 return true;
 } else if (!all_ports_tried) {
 if (min_port == max_port) {
@@ -2137,6 +2132,26 @@ nat_conn_keys_lookup(struct hmap *nat_conn_keys,
 return NULL;
 }
 
+/* This function must be called with the ct->resources lock taken. */
+static bool
+nat_conn_keys_insert(struct hmap *nat_conn_keys, const struct conn *nat_conn,
+ uint32_t basis)
+{
+struct nat_conn_key_node *nat_conn_key_node =
+nat_conn_keys_lookup(nat_conn_keys, &nat_conn->rev_key, basis);
+
+if (!nat_conn_key_node) {
+struct nat_conn_key_node *nat_conn_key = xzalloc(sizeof *nat_conn_key);
+nat_conn_key->key = nat_conn->rev_key;
+nat_conn_key->value = nat_conn->key;
+uint32_t nat_conn_key_hash = conn_key_hash(&nat_conn_key->key,
+   basis);
+hmap_insert(nat_conn_keys, &nat_conn_key->node, nat_conn_key_hash);
+return true;
+}
+return false;
+}
+
 /* This function must be called with the ct->resources write lock taken. */
 static void
 nat_conn_keys_remove(struct hmap *nat_conn_keys,
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [branch 2.8 2/2] netdev-dpdk: reset packet_type for reused dp_packets.

2017-09-25 Thread Darrell Ball
From: Zoltan Balogh 

DPDK uses dp-packet pool for storing received packets. The pool is
reused by rxq_recv funcions of the DPDK netdevs. The datapath is
capable to modify the packet_type property of packets. For instance
when encapsulated L3 packets are received on a ptap gre port.
In this case the packet_type property of struct dp_packet can be
modified and later the same dp_packet with the modified packet_type
can be reused in the rxq_rec function, so it can contain corrupted
data.

The dp_packet_batch_init_cutlen() in the rxq_recv functions iterates
over dp_packets and sets their cutlen. So I modified this function
to set packet_type to Ethernet for the dp_packets as well. I also
renamed this function because of the added functionality.

The dp_packet_batch_init_cutlen() iterates over batch->count dp_packet.
Therefore setting of batch->count = nb_rx needs to be done before the
former function is invoked. This is an additional fix.

Signed-off-by: Zoltan Balogh 
Signed-off-by: Laszlo Suru 
Co-authored-by: Laszlo Suru 
CC: Jan Scheurich 
CC: Sugesh Chandran 
CC: Darrell Ball 
Signed-off-by: Darrell Ball 
---
 lib/dp-packet.c   | 1 -
 lib/dp-packet.h   | 3 ++-
 lib/netdev-dpdk.c | 7 ---
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index 312f63a..945b81a 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -103,7 +103,6 @@ dp_packet_init_dpdk(struct dp_packet *b, size_t allocated)
 {
 dp_packet_set_allocated(b, allocated);
 b->source = DPBUF_DPDK;
-b->packet_type = htonl(PT_ETH);
 }
 
 /* Initializes 'b' as an empty dp_packet with an initial capacity of 'size'
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 75ec305..980ef78 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -801,12 +801,13 @@ dp_packet_delete_batch(struct dp_packet_batch *batch, 
bool may_steal)
 }
 
 static inline void
-dp_packet_batch_init_cutlen(struct dp_packet_batch *batch)
+dp_packet_batch_init_packet_fields(struct dp_packet_batch *batch)
 {
 struct dp_packet *packet;
 
 DP_PACKET_BATCH_FOR_EACH (packet, batch) {
 dp_packet_reset_cutlen(packet);
+packet->packet_type = htonl(PT_ETH);
 }
 }
 
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index f58e9be..b9a 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1644,8 +1644,9 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
  nb_rx, dropped);
 rte_spinlock_unlock(&dev->stats_lock);
 
-dp_packet_batch_init_cutlen(batch);
-batch->count = (int) nb_rx;
+batch->count = nb_rx;
+dp_packet_batch_init_packet_fields(batch);
+
 return 0;
 }
 
@@ -1684,8 +1685,8 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct 
dp_packet_batch *batch)
 rte_spinlock_unlock(&dev->stats_lock);
 }
 
-dp_packet_batch_init_cutlen(batch);
 batch->count = nb_rx;
+dp_packet_batch_init_packet_fields(batch);
 
 return 0;
 }
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [branch 2.8 1/2] dp-packet: Refactor DPDK packet initialization.

2017-09-25 Thread Darrell Ball
DPDK uses dp-packet pools and manages the mbuf portion of
each packet. When a pool is created, partial initialization is
also done on the OVS portion (i.e. non-mbuf).  Since packet
memory is reused, this is not very useful for transient
fields and is also misleading.  Furthermore, some of these
transient fields are properly initialized for DPDK packets
entering OVS anyways, which is the only reasonable way to do this.
Another field, cutlen, is initialized in this manner in the pool
and intended to be reset when cutlen is applied on sending the
packet out. However, if cutlen context is set but the packet is
not sent out for some reason, then the packet header would be
corrupted in the memory pool.  It is better to just reset the
cutlen in the packets when received.  I did not detect a
degradation in performance, however, I would be willing to
have some degradation, since this is a proper way to handle
this.  In addition to initializing cutlen in received packets,
the other OVS transient fields are removed from the DPDK pool
initialization.

Acked-by: Sugesh Chandran 
Signed-off-by: Darrell Ball 
---
 lib/dp-packet.c   | 16 +---
 lib/dp-packet.h   | 10 ++
 lib/netdev-dpdk.c |  2 ++
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/lib/dp-packet.c b/lib/dp-packet.c
index c1f43f3..312f63a 100644
--- a/lib/dp-packet.c
+++ b/lib/dp-packet.c
@@ -92,16 +92,18 @@ dp_packet_use_const(struct dp_packet *b, const void *data, 
size_t size)
 dp_packet_set_size(b, size);
 }
 
-/* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes of
- * memory starting at 'base'.  DPDK allocated dp_packet and *data is allocated
- * from one continous memory region, so in memory data start right after
- * dp_packet.  Therefore there is special method to free this type of
- * buffer.  dp_packet base, data and size are initialized by dpdk rcv() so no
- * need to initialize those fields. */
+/* Initializes 'b' as an empty dp_packet that contains the 'allocated' bytes.
+ * DPDK allocated dp_packet and *data is allocated from one continous memory
+ * region as part of memory pool, so in memory data start right after
+ * dp_packet.  Therefore, there is a special method to free this type of
+ * buffer.  Here, non-transient ovs dp-packet fields are initialized for
+ * packets that are part of a DPDK memory pool. */
 void
 dp_packet_init_dpdk(struct dp_packet *b, size_t allocated)
 {
-dp_packet_init__(b, allocated, DPBUF_DPDK);
+dp_packet_set_allocated(b, allocated);
+b->source = DPBUF_DPDK;
+b->packet_type = htonl(PT_ETH);
 }
 
 /* Initializes 'b' as an empty dp_packet with an initial capacity of 'size'
diff --git a/lib/dp-packet.h b/lib/dp-packet.h
index 8b3f0ff..75ec305 100644
--- a/lib/dp-packet.h
+++ b/lib/dp-packet.h
@@ -801,6 +801,16 @@ dp_packet_delete_batch(struct dp_packet_batch *batch, bool 
may_steal)
 }
 
 static inline void
+dp_packet_batch_init_cutlen(struct dp_packet_batch *batch)
+{
+struct dp_packet *packet;
+
+DP_PACKET_BATCH_FOR_EACH (packet, batch) {
+dp_packet_reset_cutlen(packet);
+}
+}
+
+static inline void
 dp_packet_batch_apply_cutlen(struct dp_packet_batch *batch)
 {
 if (batch->trunc) {
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 62dbb7e..f58e9be 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1644,6 +1644,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
  nb_rx, dropped);
 rte_spinlock_unlock(&dev->stats_lock);
 
+dp_packet_batch_init_cutlen(batch);
 batch->count = (int) nb_rx;
 return 0;
 }
@@ -1683,6 +1684,7 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, struct 
dp_packet_batch *batch)
 rte_spinlock_unlock(&dev->stats_lock);
 }
 
+dp_packet_batch_init_cutlen(batch);
 batch->count = nb_rx;
 
 return 0;
-- 
1.9.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Darrell Ball


On 9/25/17, 12:41 PM, "Ben Pfaff"  wrote:

On Mon, Sep 25, 2017 at 06:37:29PM +, Darrell Ball wrote:
> 
> 
> On 9/25/17, 11:31 AM, "Ben Pfaff"  wrote:
> 
> On Mon, Sep 25, 2017 at 06:28:46PM +, Darrell Ball wrote:
> > 
> > 
> > On 9/25/17, 10:49 AM, "ovs-dev-boun...@openvswitch.org on behalf of 
Justin Pettit"  
wrote:
> > 
> > 
> > > On Sep 25, 2017, at 5:54 AM, Russell Bryant  
wrote:
> > > 
> > > There's some important changes in branch-2.7.  Can we do a 
2.7.3
> > > release soon?  Does anyone have anything they need addressed 
first?
> > 
> > I'm happy to do a release.  I think we're due for a 2.8.1 
release, too.
> > 
> > Ian wants a DPDK patch moved into branch-2.8.  Is there 
anything else that's important?
> > 
> > 
> > 2.8.0 was Aug 31.
> > Maybe 2.8.1 can wait a couple weeks to give time for other 
potential fixes?
> 
> There will always be more fixes, and we can always do more releases.
> 
> Do you know of important fixes that are in the works?
> 
> There are a couple backport requests in the dpdk merge.

OK.

I merged that request, and backported one of them, but "netdev-dpdk:
reset packet_type for reused dp_packets." had a conflict.  Can you send
out a backport?

> Also, ideally, I would like to include
> 
https://urldefense.proofpoint.com/v2/url?u=https-3A__patchwork.ozlabs.org_patch_816712_&d=DwIBAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=dUNQc7lLVYHsKc9aahwyrvZMr8T9MBwGKdlBzWvttMM&s=EyJ0ntHzrXaLo0BMDREZnHkpMhNLixvNi-HdUDBoWzQ&e=
 
> as well.

Just patch 5 from that series?  Usually, I expect important bug fixes to
be early in a series.

Main aspect is in patch 5, but I would want to include patches 3-5.
I have a new version of the series that I need to send out, which I’ll do this 
afternoon.






___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Ben Pfaff
On Mon, Sep 25, 2017 at 06:37:29PM +, Darrell Ball wrote:
> 
> 
> On 9/25/17, 11:31 AM, "Ben Pfaff"  wrote:
> 
> On Mon, Sep 25, 2017 at 06:28:46PM +, Darrell Ball wrote:
> > 
> > 
> > On 9/25/17, 10:49 AM, "ovs-dev-boun...@openvswitch.org on behalf of 
> Justin Pettit"  
> wrote:
> > 
> > 
> > > On Sep 25, 2017, at 5:54 AM, Russell Bryant  
> wrote:
> > > 
> > > There's some important changes in branch-2.7.  Can we do a 2.7.3
> > > release soon?  Does anyone have anything they need addressed 
> first?
> > 
> > I'm happy to do a release.  I think we're due for a 2.8.1 release, 
> too.
> > 
> > Ian wants a DPDK patch moved into branch-2.8.  Is there anything 
> else that's important?
> > 
> > 
> > 2.8.0 was Aug 31.
> > Maybe 2.8.1 can wait a couple weeks to give time for other potential 
> fixes?
> 
> There will always be more fixes, and we can always do more releases.
> 
> Do you know of important fixes that are in the works?
> 
> There are a couple backport requests in the dpdk merge.

OK.

I merged that request, and backported one of them, but "netdev-dpdk:
reset packet_type for reused dp_packets." had a conflict.  Can you send
out a backport?

> Also, ideally, I would like to include
> https://patchwork.ozlabs.org/patch/816712/
> as well.

Just patch 5 from that series?  Usually, I expect important bug fixes to
be early in a series.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] dpctl: init CT entry variable.

2017-09-25 Thread Darrell Ball


On 9/25/17, 2:51 AM, "Fischetti, Antonio"  wrote:

> -Original Message-
> From: Darrell Ball [mailto:db...@vmware.com]
> Sent: Friday, September 22, 2017 9:26 AM
> To: Fischetti, Antonio ; d...@openvswitch.org
> Subject: Re: [ovs-dev] [PATCH 2/2] dpctl: init CT entry variable.
> 
> 
> 
> On 9/13/17, 5:37 AM, "ovs-dev-boun...@openvswitch.org on behalf of
> antonio.fische...@intel.com"  antonio.fische...@intel.com> wrote:
> 
> ct_dpif_entry_uninit could potentially be called even if
> ct_dpif_dump_next failed. As ct_dpif_entry_uninit receives
> a pointer to a CT entry - and just checks it is not null -
> it's safer to init to zero any instantiated ct_dpif_entry
> variable before its usage.
> 
> [Darrell] I took a look and did not see a particular problem.
>Was there an issue that we are trying to address?; if so, 
this
> may hide it ?

[Antonio]
This change is more a matter of keeping safe habits for future
code additions. 
In a new CT function that could be added down the line, one could
potentially call ct_dpif_entry_uninit without checking what was
returned by ct_dpif_dump_next.

As this is not in the hotpath, I added a memset to be extra-careful 
when initializing the local CT entry variable.

Maybe also a comment on top of the fn definition could help on this,
something like?

+/* This function must be called when the returned
+   value from ct_dpif_dump_next is 0. */
void
ct_dpif_entry_uninit(struct ct_dpif_entry *entry)
{
if (entry) {
if (entry->helper.name) {


[Darrell] It is usually better to wait for the new code that might need this and
   associate those patches as part of the same series.
   Can we do that ?



> 
> 
> Signed-off-by: Antonio Fischetti 
> ---
>  lib/dpctl.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/lib/dpctl.c b/lib/dpctl.c
> index 86d0f90..77d4e58 100644
> --- a/lib/dpctl.c
> +++ b/lib/dpctl.c
> @@ -1287,6 +1287,7 @@ dpctl_dump_conntrack(int argc, const char 
*argv[],
>  return error;
>  }
> 
> +memset(&cte, 0, sizeof(cte));
>  while (!(ret = ct_dpif_dump_next(dump, &cte))) {
>  struct ds s = DS_EMPTY_INITIALIZER;
> 
> @@ -1392,6 +1393,7 @@ dpctl_ct_stats_show(int argc, const char 
*argv[],
>  return error;
>  }
> 
> +memset(&cte, 0, sizeof(cte));
>  int tot_conn = 0;
>  while (!(ret = ct_dpif_dump_next(dump, &cte))) {
>  ct_dpif_entry_uninit(&cte);
> @@ -1532,6 +1534,7 @@ dpctl_ct_bkts(int argc, const char *argv[],
>   return 0;
>  }
> 
> +memset(&cte, 0, sizeof(cte));
>  dpctl_print(dpctl_p, "Total Buckets: %d\n", tot_bkts);
> 
>  int tot_conn = 0;
> --
> 2.4.11
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__mail.openvswitch.org_mailman_listinfo_ovs-
> 2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-
> 
uZnsw&m=3FF1c4sa7rHZb5a1DAZQlnsPZywcY7R_LNFki9WS9So&s=tU4fSt243XI_2QHkAF4R2h0sm
> vtTC8fDyiOXBI02_t8&e=
> 



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] dpctl: manage ret value when dumping CT entries.

2017-09-25 Thread Darrell Ball


On 9/25/17, 2:27 AM, "Fischetti, Antonio"  wrote:

Hi Darrell, 
I agree with your suggestion in keeping 'error'
as the only variable to manage return values.

In this case - as I'm assuming we shouldn't return an EOF to the 
caller - I should manage error as below?

if (error == EOF) {
error = 0;  << EOF is not an issue, so return 0 to the caller
} else if (error) {  
dpctl_error(dpctl_p, error, "dumping conntrack entry");
ct_dpif_dump_done(dump);
dpif_close(dpif);
return error;
} 


[Darrell] For sure - EOF should not be returned to user since it is not an 
error.
   The other point I wanted to make is:
I think you can trivially fall thru. for dpctl_dump_conntrack()
 After doing just dpctl_error(dpctl_p, error, "dumping 
conntrack entry");
   (comments inline).
   And in the other 2 cases, I think you can still try to print out 
whatever is known
   after breaking out of the while loop and use the existing 
cleanup code.
   You would print error in the cases as well
What do you think ?




> -Original Message-
> From: Darrell Ball [mailto:db...@vmware.com]
> Sent: Friday, September 22, 2017 8:42 AM
> To: Fischetti, Antonio ; d...@openvswitch.org
> Subject: Re: [ovs-dev] [PATCH 1/2] dpctl: manage ret value when dumping CT
> entries.
> 
> Few comments Antonio
> 
> Darrell
> 
> On 9/13/17, 5:37 AM, "ovs-dev-boun...@openvswitch.org on behalf of
> antonio.fische...@intel.com"  antonio.fische...@intel.com> wrote:
> 
> Manage error value returned by ct_dpif_dump_next.
> 
> Signed-off-by: Antonio Fischetti 
> ---
>  lib/dpctl.c | 28 +---
>  1 file changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/dpctl.c b/lib/dpctl.c
> index 8951d6e..86d0f90 100644
> --- a/lib/dpctl.c
> +++ b/lib/dpctl.c
> @@ -1263,6 +1263,7 @@ dpctl_dump_conntrack(int argc, const char 
*argv[],
>  struct dpif *dpif;
>  char *name;
>  int error;
> +int ret;
> 
>  if (argc > 1 && ovs_scan(argv[argc - 1], "zone=%"SCNu16, &zone)) 
{
>  pzone = &zone;
> @@ -1286,7 +1287,7 @@ dpctl_dump_conntrack(int argc, const char 
*argv[],
>  return error;
>  }
> 
> -while (!ct_dpif_dump_next(dump, &cte)) {
> +while (!(ret = ct_dpif_dump_next(dump, &cte))) {
>  struct ds s = DS_EMPTY_INITIALIZER;
> 
>  ct_dpif_format_entry(&cte, &s, dpctl_p->verbosity,
> @@ -1296,6 +1297,13 @@ dpctl_dump_conntrack(int argc, const char 
*argv[],
>  dpctl_print(dpctl_p, "%s\n", ds_cstr(&s));
>  ds_destroy(&s);
>  }
> +if (ret && ret != EOF) {
> +dpctl_error(dpctl_p, ret, "dumping conntrack entry");
> +ct_dpif_dump_done(dump);
> +dpif_close(dpif);
> +return ret;
> +}
> +
> 
> [Darrell] Maybe we can reuse ‘error’ ?
> if (error && error != EOF) {
>and just do
>dpctl_error(dpctl_p, error, "dumping conntrack entry");
>  and then fall thru for cleanup ?
> 
> 
>  ct_dpif_dump_done(dump);
>  dpif_close(dpif);
>  return error;
> @@ -1348,6 +1356,7 @@ dpctl_ct_stats_show(int argc, const char 
*argv[],
>  int proto_stats[CT_STATS_MAX];
>  int tcp_conn_per_states[CT_DPIF_TCPS_MAX_NUM];
>  int error;
> +int ret;
> 
>  while (argc > 1 && lastargc != argc) {
>  lastargc = argc;
> @@ -1384,7 +1393,7 @@ dpctl_ct_stats_show(int argc, const char 
*argv[],
>  }
> 
>  int tot_conn = 0;
> -while (!ct_dpif_dump_next(dump, &cte)) {
> +while (!(ret = ct_dpif_dump_next(dump, &cte))) {
>  ct_dpif_entry_uninit(&cte);
>  tot_conn++;
>  switch (cte.tuple_orig.ip_proto) {
> @@ -1425,6 +1434,12 @@ dpctl_ct_stats_show(int argc, const char 
*argv[],
>  break;
>  }
>  }
> +if (ret && ret != EOF) {
> +dpctl_error(dpctl_p, ret, "dumping conntrack entry");
> +ct_dpif_dump_done(dump);
> +dpif_close(dpif);
> +return ret;
> +}
> 
> [Darrell]
>  Can we reuse ‘error’, just print error and fall thru. ?
> It looks like it is safe to print whatever we know, which could be useful.
> Otherwise, if we have

Re: [ovs-dev] [PATCH net-next v9] openvswitch: enable NSH support

2017-09-25 Thread Eric Garver
On Mon, Sep 25, 2017 at 10:16:09PM +0800, Yi Yang wrote:
> v8->v9
>  - Fix build error reported by daily intel build
>because nsh module isn't selected by openvswitch
> 
> v7->v8
>  - Rework nested value and mask for OVS_KEY_ATTR_NSH
>  - Change pop_nsh to adapt to nsh kernel module
>  - Fix many issues per comments from Jiri Benc
> 
> v6->v7
>  - Remove NSH GSO patches in v6 because Jiri Benc
>reworked it as another patch series and they have
>been merged.
>  - Change it to adapt to nsh kernel module added by NSH
>GSO patch series
> 
> v5->v6
>  - Fix the rest comments for v4.
>  - Add NSH GSO support for VxLAN-gpe + NSH and
>Eth + NSH.
> 
> v4->v5
>  - Fix many comments by Jiri Benc and Eric Garver
>for v4.
> 
> v3->v4
>  - Add new NSH match field ttl
>  - Update NSH header to the latest format
>which will be final format and won't change
>per its author's confirmation.
>  - Fix comments for v3.
> 
> v2->v3
>  - Change OVS_KEY_ATTR_NSH to nested key to handle
>length-fixed attributes and length-variable
>attriubte more flexibly.
>  - Remove struct ovs_action_push_nsh completely
>  - Add code to handle nested attribute for SET_MASKED
>  - Change PUSH_NSH to use the nested OVS_KEY_ATTR_NSH
>to transfer NSH header data.
>  - Fix comments and coding style issues by Jiri and Eric
> 
> v1->v2
>  - Change encap_nsh and decap_nsh to push_nsh and pop_nsh
>  - Dynamically allocate struct ovs_action_push_nsh for
>length-variable metadata.
> 
> OVS master and 2.8 branch has merged NSH userspace
> patch series, this patch is to enable NSH support
> in kernel data path in order that OVS can support
> NSH in compat mode by porting this.
> 
> Signed-off-by: Yi Yang 
> ---
>  include/net/nsh.h|   3 +
>  include/uapi/linux/openvswitch.h |  29 
>  net/nsh/nsh.c|  53 +++
>  net/openvswitch/Kconfig  |   1 +
>  net/openvswitch/actions.c| 112 ++
>  net/openvswitch/flow.c   |  51 ++
>  net/openvswitch/flow.h   |  11 ++
>  net/openvswitch/flow_netlink.c   | 324 
> ++-
>  net/openvswitch/flow_netlink.h   |   5 +
>  9 files changed, 588 insertions(+), 1 deletion(-)
> 
[...]
> diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
> index 58fb827..54334ca 100644
> --- a/net/nsh/nsh.c
> +++ b/net/nsh/nsh.c
> @@ -14,6 +14,59 @@
>  #include 
>  #include 
>  
> +int skb_push_nsh(struct sk_buff *skb, const struct nshhdr *src_nsh_hdr)
> +{
> + struct nshhdr *nsh_hdr;
> + size_t length = nsh_hdr_len(src_nsh_hdr);
> + u8 next_proto;
> +
> + if (skb->mac_len) {
> + next_proto = TUN_P_ETHERNET;
> + } else {
> + next_proto = tun_p_from_eth_p(skb->protocol);
> + if (!next_proto)
> + return -EAFNOSUPPORT;
> + }
> +
> + /* Add the NSH header */
> + if (skb_cow_head(skb, length) < 0)
> + return -ENOMEM;
> +
> + skb_push(skb, length);
> + nsh_hdr = (struct nshhdr *)(skb->data);
> + memcpy(nsh_hdr, src_nsh_hdr, length);
> + nsh_hdr->np = next_proto;
> +
> + skb->protocol = htons(ETH_P_NSH);
> + skb_reset_mac_header(skb);
> + skb_reset_mac_len(skb);
> + skb_reset_network_header(skb);

I think you mean to reset network_header before mac_len.

> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(skb_push_nsh);
> +
> +int skb_pop_nsh(struct sk_buff *skb)
> +{
> + struct nshhdr *nsh_hdr = (struct nshhdr *)(skb->data);
> + size_t length;
> + u16 inner_proto;
> +
> + inner_proto = tun_p_to_eth_p(nsh_hdr->np);
> + if (!inner_proto)
> + return -EAFNOSUPPORT;
> +
> + length = nsh_hdr_len(nsh_hdr);
> + skb_pull(skb, length);

Do you need to verify you can actually pull length bytes? I don't see
any guarantee.

> + skb_reset_mac_header(skb);
> + skb_reset_mac_len(skb);
> + skb_reset_network_header(skb);

Reset network before mac_len.

> + skb->protocol = inner_proto;
> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(skb_pop_nsh);
> +
>  static struct sk_buff *nsh_gso_segment(struct sk_buff *skb,
>  netdev_features_t features)
>  {
[...]
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index a54a556..d026b85 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
[...]
> @@ -602,6 +642,59 @@ static int set_ipv6(struct sk_buff *skb, struct 
> sw_flow_key *flow_key,
>   return 0;
>  }
>  
> +static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
> +const struct nlattr *a)
> +{
> + struct nshhdr *nsh_hdr;
> + int err;
> + u8 flags;
> + u8 ttl;
> + int i;
> +
> + struct ovs_key_nsh key;
> + struct ovs_key_nsh mask;
> +
> + err = nsh_key_from_nlattr(a, &key, &mask);
> + if (err)
> + return err;
> +
> + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> + 

Re: [ovs-dev] [PATCH] rhel: fix log directory permissions

2017-09-25 Thread Russell Bryant
On Mon, Sep 25, 2017 at 2:42 PM, Aaron Conole  wrote:

> Flavio Leitner  writes:
>
> > On Fri, 22 Sep 2017 09:44:18 -0400
> > Aaron Conole  wrote:
> >
> >> When the logrotate script runs, and Open vSwitch is running as a
> non-root
> >> user, the /var/log/openvswitch directory doesn't have other rx bits set.
> >> This means the reopen attempt will fail with "permission denied", even
> though
> >> the default logrotate configuration creates a new log file with the
> >> appropriate attributes.
> >>
> >> This change sets the r/x bits for other on /var/log/messages
> >
> > /var/log/openvswitch? :-)
>
> D'oh!  Let's blame it on the problem between the keyboard and chair.
>
> Russell - since you're likely the committer for this, do you want a v2
> with a fixed message, or would you be able to fix it during apply?
>

​If it's just the commit message, don't worry about v2.​



>
> > Reproduced here
> > # ovs-appctl -t ovs-vswitchd vlog/reopen
> > Permission denied
> > ovs-appctl: ovs-vswitchd: server returned an error
> >
> > Acked-by: Flavio Leitner 
> >
> >
> >>
> >> Signed-off-by: Aaron Conole 
> >> Tested-by: Jean Hsiao 
> >> ---
> >>  rhel/openvswitch-fedora.spec.in | 2 +-
> >>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.
> spec.in
> >> index dd79fa9..8d62393 100644
> >> --- a/rhel/openvswitch-fedora.spec.in
> >> +++ b/rhel/openvswitch-fedora.spec.in
> >> @@ -577,7 +577,7 @@ fi
> >>  %endif
> >>  %doc COPYING NOTICE README.rst NEWS rhel/README.RHEL.rst
> >>  /var/lib/openvswitch
> >> -/var/log/openvswitch
> >> +%attr(755,-,-) /var/log/openvswitch
> >>  %ghost %attr(755,root,root) %{_rundir}/openvswitch
> >>
> >>  %files ovn-docker
>



-- 
Russell Bryant
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] rhel: fix log directory permissions

2017-09-25 Thread Aaron Conole
Flavio Leitner  writes:

> On Fri, 22 Sep 2017 09:44:18 -0400
> Aaron Conole  wrote:
>
>> When the logrotate script runs, and Open vSwitch is running as a non-root
>> user, the /var/log/openvswitch directory doesn't have other rx bits set.
>> This means the reopen attempt will fail with "permission denied", even though
>> the default logrotate configuration creates a new log file with the
>> appropriate attributes.
>> 
>> This change sets the r/x bits for other on /var/log/messages
>
> /var/log/openvswitch? :-)

D'oh!  Let's blame it on the problem between the keyboard and chair.

Russell - since you're likely the committer for this, do you want a v2
with a fixed message, or would you be able to fix it during apply?

> Reproduced here
> # ovs-appctl -t ovs-vswitchd vlog/reopen 
> Permission denied
> ovs-appctl: ovs-vswitchd: server returned an error
>
> Acked-by: Flavio Leitner 
>
>
>> 
>> Signed-off-by: Aaron Conole 
>> Tested-by: Jean Hsiao 
>> ---
>>  rhel/openvswitch-fedora.spec.in | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>> 
>> diff --git a/rhel/openvswitch-fedora.spec.in 
>> b/rhel/openvswitch-fedora.spec.in
>> index dd79fa9..8d62393 100644
>> --- a/rhel/openvswitch-fedora.spec.in
>> +++ b/rhel/openvswitch-fedora.spec.in
>> @@ -577,7 +577,7 @@ fi
>>  %endif
>>  %doc COPYING NOTICE README.rst NEWS rhel/README.RHEL.rst
>>  /var/lib/openvswitch
>> -/var/log/openvswitch
>> +%attr(755,-,-) /var/log/openvswitch
>>  %ghost %attr(755,root,root) %{_rundir}/openvswitch
>>  
>>  %files ovn-docker
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Darrell Ball


On 9/25/17, 11:31 AM, "Ben Pfaff"  wrote:

On Mon, Sep 25, 2017 at 06:28:46PM +, Darrell Ball wrote:
> 
> 
> On 9/25/17, 10:49 AM, "ovs-dev-boun...@openvswitch.org on behalf of 
Justin Pettit"  
wrote:
> 
> 
> > On Sep 25, 2017, at 5:54 AM, Russell Bryant  wrote:
> > 
> > There's some important changes in branch-2.7.  Can we do a 2.7.3
> > release soon?  Does anyone have anything they need addressed first?
> 
> I'm happy to do a release.  I think we're due for a 2.8.1 release, 
too.
> 
> Ian wants a DPDK patch moved into branch-2.8.  Is there anything else 
that's important?
> 
> 
> 2.8.0 was Aug 31.
> Maybe 2.8.1 can wait a couple weeks to give time for other potential 
fixes?

There will always be more fixes, and we can always do more releases.

Do you know of important fixes that are in the works?

There are a couple backport requests in the dpdk merge.
Also, ideally, I would like to include
https://patchwork.ozlabs.org/patch/816712/
as well.





___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Documentation: Also define install-man-rst when Sphinx is not available.

2017-09-25 Thread Ben Pfaff
On Mon, Sep 25, 2017 at 11:29:40AM -0700, Justin Pettit wrote:
> 
> > On Sep 25, 2017, at 11:27 AM, Ben Pfaff  wrote:
> > 
> > "make sandbox" wants to install the ReST manpages, but it failed when
> > Sphinx wasn't available.  This fixes the problem.
> > 
> > Fixes: 986311be550e ("ovs-sandbox: Install .rst manpages into the sandbox 
> > as well.")
> > Reported-by: Justin Pettit 
> > Signed-off-by: Ben Pfaff 
> 
> Thanks for the quick fix!
> 
> Acked-by: Justin Pettit 

Thanks, applied to master and branch-2.8.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Justin Pettit

> On Sep 25, 2017, at 11:28 AM, Darrell Ball  wrote:
> 
> 2.8.0 was Aug 31.
> Maybe 2.8.1 can wait a couple weeks to give time for other potential fixes?

Any particular reason?  A number of serious issues have been fixed, including a 
serious crashing issue from Andy.

--Justin


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Ben Pfaff
On Mon, Sep 25, 2017 at 06:28:46PM +, Darrell Ball wrote:
> 
> 
> On 9/25/17, 10:49 AM, "ovs-dev-boun...@openvswitch.org on behalf of Justin 
> Pettit"  wrote:
> 
> 
> > On Sep 25, 2017, at 5:54 AM, Russell Bryant  wrote:
> > 
> > There's some important changes in branch-2.7.  Can we do a 2.7.3
> > release soon?  Does anyone have anything they need addressed first?
> 
> I'm happy to do a release.  I think we're due for a 2.8.1 release, too.
> 
> Ian wants a DPDK patch moved into branch-2.8.  Is there anything else 
> that's important?
> 
> 
> 2.8.0 was Aug 31.
> Maybe 2.8.1 can wait a couple weeks to give time for other potential fixes?

There will always be more fixes, and we can always do more releases.

Do you know of important fixes that are in the works?
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] locks for clustered OVSDB

2017-09-25 Thread Ben Pfaff
On Mon, Sep 25, 2017 at 11:09:49AM -0700, Han Zhou wrote:
> On Mon, Sep 25, 2017 at 2:36 AM, Miguel Angel Ajo Pelayo <
> majop...@redhat.com> wrote:
> >
> > I believe Lucas Alvares could give you valuable feedback on this as
> > he was planning to use this as a mechanism for synchronization on
> > the networking-ovn side (if I didn't get it wrong).
> >
> > I believe he's back by October.
> >
> > Best regards.
> > Miguel Ángel.
> >
> > On Fri, Sep 22, 2017 at 6:58 PM, Ben Pfaff  wrote:
> >
> > > We've had a couple of brief discussions during the OVN meeting about
> > > locks in OVSDB.  As I understand it, a few services use OVSDB locks to
> > > avoid duplicating work.  The question is whether and how to extend OVSDB
> > > locks to a distributed context.
> > >
> > > First, I think it's worth reviewing how OVSDB locks work, filling in
> > > some of the implications that aren't covered by RFC 7047.  OVSDB locks
> > > are server-level (not database-level) objects that can be owned by at
> > > most one client at a time.  Clients can obtain them either through a
> > > "lock" operation, in which case they get queued to obtain the lock when
> > > it's no longer owned by anyone else, or through a "steal" operation that
> > > always succeeds immediately, kicking out whoever (if anyone) previously
> > > owned the lock.  A client loses a lock whenever it releases it with an
> > > "unlock" operation or whenever its connection to the server drops.  The
> > > server notifies a client whenever it acquires a lock or whenever it is
> > > stolen by another client.
> > >
> > > This scheme works perfectly for one particular scenario: where the
> > > resource protected by the lock is an OVSDB database (or part of one) on
> > > the same server as the lock.  This is because OVSDB transactions include
> > > an "assert" operation that names a lock and aborts the transaction if
> > > the client does not hold the lock.  Since the server is both the lock
> > > manager and the implementer of the transaction, it can always make the
> > > correct decision.  This scenario could be extended to distributed locks
> > > with the same guarantee.
> > >
> > > Another scenario that could work acceptably with distributed OVSDB locks
> > > is one where the lock guards against duplicated work.  For example,
> > > suppose a couple of ovn-northd instances both try to grab a lock, with
> > > only the winner actually running, to avoid having both of them spend a
> > > lot of CPU time recomputing the southbound flow table.  A distributed
> > > version of OVSDB locks would probably work fine in practice for this,
> > > although occasionally due to network propagation delays, "steal"
> > > operations, or different ideas between client and server of when a
> > > session has dropped, both ovn-northd might think they have the lock.
> > > (If, however, they combined this with "assert" when they actually
> > > committed their changes to the southbound database, then they would
> > > never actually interfere with each other in database commits.)
> > >
> > > A scenario that would not work acceptably with distributed OVSDB locks,
> > > without a change to the model, is where the lock ensures correctness,
> > > that is, if two clients both think they have the lock then bad things
> > > happen.  I believe that this requires clients to understand a concept of
> > > leases, which OVSDB doesn't currently have.  The "steal" operation is
> > > also problematic in this model since it would require canceling a
> > > lease.  (This scenario also does not work acceptably with single-server
> > > OVSDB locks.)
> > >
> > > I'd appreciate anyone's thoughts on the topic.
> > >
> > > This webpage is good reading:
> > >
> https://martin.kleppmann.com/2016/02/08/how-to-do-distributed-locking.html
> > >
> > > Thanks,
> > >
> > > Ben.
> 
> Hi Ben,
> 
> If I understand correctly, you are saying that the clustering wouldn't
> introduce any new restriction to the locking mechanism, comparing with the
> current single node implementation. Both new and old approach support
> avoiding redundant work, but not for correctness (unless "assert" or some
> other "fence" is used). Is this correct?

It's accurate that clustering would not technically introduce new
restrictions.  It will increase race windows, especially over Unix
sockets, so anyone who is currently (incorrectly) relying on OVSDB
locking for correctness will probably start seeing failures that they
did not see before.  I'd be pleased to hear that no one is doing this.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Documentation: Also define install-man-rst when Sphinx is not available.

2017-09-25 Thread Justin Pettit

> On Sep 25, 2017, at 11:27 AM, Ben Pfaff  wrote:
> 
> "make sandbox" wants to install the ReST manpages, but it failed when
> Sphinx wasn't available.  This fixes the problem.
> 
> Fixes: 986311be550e ("ovs-sandbox: Install .rst manpages into the sandbox as 
> well.")
> Reported-by: Justin Pettit 
> Signed-off-by: Ben Pfaff 

Thanks for the quick fix!

Acked-by: Justin Pettit 

--Justin


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Darrell Ball


On 9/25/17, 10:49 AM, "ovs-dev-boun...@openvswitch.org on behalf of Justin 
Pettit"  wrote:


> On Sep 25, 2017, at 5:54 AM, Russell Bryant  wrote:
> 
> There's some important changes in branch-2.7.  Can we do a 2.7.3
> release soon?  Does anyone have anything they need addressed first?

I'm happy to do a release.  I think we're due for a 2.8.1 release, too.

Ian wants a DPDK patch moved into branch-2.8.  Is there anything else 
that's important?


2.8.0 was Aug 31.
Maybe 2.8.1 can wait a couple weeks to give time for other potential fixes?


I'll get this started today.

Thanks,

--Justin


___
dev mailing list
d...@openvswitch.org

https://urldefense.proofpoint.com/v2/url?u=https-3A__mail.openvswitch.org_mailman_listinfo_ovs-2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-uZnsw&m=dKkXcbvKlR9VFW9RtgM6zkLKH69aZVFOJJgMbLi0AgY&s=HHdVOaureGAgXfws4aSuFAyTwloweHsGbLMcqucTntA&e=
 


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] Documentation: Also define install-man-rst when Sphinx is not available.

2017-09-25 Thread Ben Pfaff
"make sandbox" wants to install the ReST manpages, but it failed when
Sphinx wasn't available.  This fixes the problem.

Fixes: 986311be550e ("ovs-sandbox: Install .rst manpages into the sandbox as 
well.")
Reported-by: Justin Pettit 
Signed-off-by: Ben Pfaff 
---
 Documentation/automake.mk | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/Documentation/automake.mk b/Documentation/automake.mk
index 24fe63d87140..6f38912f264b 100644
--- a/Documentation/automake.mk
+++ b/Documentation/automake.mk
@@ -178,8 +178,8 @@ extract_stem_and_section = \
eval "mandir=\$$man$${section}dir"; \
test -n "$$mandir" || { echo "unknown directory for manpage section 
$$section"; continue; }
 
-if HAVE_SPHINX
 INSTALL_DATA_LOCAL += install-man-rst
+if HAVE_SPHINX
 install-man-rst: docs-check
@$(set_mandirs); \
for rst in $(RST_MANPAGES); do \
@@ -189,6 +189,9 @@ install-man-rst: docs-check
echo " $(INSTALL_DATA) $(SPHINXBUILDDIR)/man/$$stem.$$section 
'$(DESTDIR)'\"$$mandir/$$stem.$$section\""; \
$(INSTALL_DATA) $(SPHINXBUILDDIR)/man/$$stem.$$section 
'$(DESTDIR)'"$$mandir/$$stem.$$section"; \
done
+else
+install-man-rst:
+   @:
 endif
 
 UNINSTALL_LOCAL += uninstall-man-rst
-- 
2.10.2

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] EMAIL UPGRADE MEMO

2017-09-25 Thread Bermie Lomibao - Medical


EMAIL UPGRADE MEMO

Please be immediately notify that your Email acount will soon be block if not 
upgraded now to our newest version of Microsoft Email account. Do 
Upgrade now.

Account Upgrade Team


Copyright 2005-2017 © Webmail Inc. All Right Reserve



Please consider the environment before you print this email

Disclaimer: The information contained in this communication is intended solely 
for the use of the individual or entity to whom it is addressed and others 
authorized to receive it. It may contain confidential or legally privileged 
information. If you are not the intended recipient you are hereby notified that 
any disclosure, copying, distribution or taking any action in reliance on the 
contents of this information is strictly prohibited and may be unlawful. If you 
have erroneously received this communication, please notify us immediately by 
responding to this email and then delete it from your system. Maxicare is 
neither liable for the improper and incomplete transmission of the information 
contained in this communication nor for any delay in its receipt.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v9] openvswitch: enable NSH support

2017-09-25 Thread Jiri Benc
On Mon, 25 Sep 2017 22:16:09 +0800, Yi Yang wrote:
> + skb->protocol = htons(ETH_P_NSH);
> + skb_reset_mac_header(skb);
> + skb_reset_mac_len(skb);
> + skb_reset_network_header(skb);

The last two lines are swapped. Network header needs to be reset before
mac_len.

> +
> + return 0;
> +}
> +EXPORT_SYMBOL_GPL(skb_push_nsh);
> +
> +int skb_pop_nsh(struct sk_buff *skb)
> +{
> + struct nshhdr *nsh_hdr = (struct nshhdr *)(skb->data);
> + size_t length;
> + u16 inner_proto;

__be16 inner_proto.

> +static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key,
> + const struct nshhdr *src_nsh_hdr)
> +{
> + int err;
> +
> + err = skb_push_nsh(skb, src_nsh_hdr);
> + if (err)
> + return err;
> +
> + key->eth.type = htons(ETH_P_NSH);

I wonder why you have this assignment here. The key is invalidated,
thus nothing should rely on key->eth.type. However, looking at the code
and ovs_fragment in particular, I'm not sure that's the case. Could you
please explain why it is needed? And why the reverse of it is not
needed in pop_nsh?

> +
> + /* safe right before invalidate_flow_key */
> + key->mac_proto = MAC_PROTO_NONE;
> + invalidate_flow_key(key);
> + return 0;
> +}
[...]
> +static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key,
> +const struct nlattr *a)
> +{
> + struct nshhdr *nsh_hdr;
> + int err;
> + u8 flags;
> + u8 ttl;
> + int i;
> +
> + struct ovs_key_nsh key;
> + struct ovs_key_nsh mask;
> +
> + err = nsh_key_from_nlattr(a, &key, &mask);
> + if (err)
> + return err;
> +
> + err = skb_ensure_writable(skb, skb_network_offset(skb) +
> +   sizeof(struct nshhdr));

Whitespace nit: the sizeof should be aligned to skb_network_offset.

> + if (unlikely(err))
> + return err;
> +
> + nsh_hdr = (struct nshhdr *)skb_network_header(skb);

Please use the nsh_hdr function (I'm sure I already requested that in
the previous review?). It means renaming the nsh_hdr variable.

> @@ -1210,6 +1307,21 @@ static int do_execute_actions(struct datapath *dp, 
> struct sk_buff *skb,
>   case OVS_ACTION_ATTR_POP_ETH:
>   err = pop_eth(skb, key);
>   break;
> +
> + case OVS_ACTION_ATTR_PUSH_NSH: {
> + u8 buffer[NSH_HDR_MAX_LEN];
> + struct nshhdr *nsh_hdr = (struct nshhdr *)buffer;
> + const struct nshhdr *src_nsh_hdr = nsh_hdr;
> +
> + nsh_hdr_from_nlattr(nla_data(a), nsh_hdr,
> + NSH_HDR_MAX_LEN);
> + err = push_nsh(skb, key, src_nsh_hdr);

Is the src_nsh_hdr variable really necessary? Cannot be nsh_hdr passed
directly to push_nsh, relying on automatic retyping to const?

By the way, nsh_hdr is a poor name for a variable, it clashes with the
nsh_hdr function. For clarity, please rename the variables in the whole
patch to something else.

> +static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key)
> +{
> + struct nshhdr *nsh_hdr;
> + unsigned int nh_ofs = skb_network_offset(skb);
> + u8 version, length;
> + int err;
> +
> + err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN);
> + if (unlikely(err))
> + return err;
> +
> + nsh_hdr = (struct nshhdr *)skb_network_header(skb);

Again, rename the variable and use the nsh_hdr function.

> + version = nsh_get_ver(nsh_hdr);
> + length = nsh_hdr_len(nsh_hdr);
> +
> + if (version != 0)
> + return -EINVAL;
> +
> + err = check_header(skb, nh_ofs + length);
> + if (unlikely(err))
> + return err;
> +
> + nsh_hdr = (struct nshhdr *)skb_network_header(skb);

Ditto.

> +struct ovs_key_nsh {
> + u8 flags;
> + u8 ttl;
> + u8 mdtype;
> + u8 np;
> + __be32 path_hdr;
> + __be32 context[NSH_MD1_CONTEXT_SIZE];
> +};

This should be:

struct ovs_key_nsh {
struct ovs_nsh_key_base base;
__be32 context[NSH_MD1_CONTEXT_SIZE];
};

to capture the real dependency. Implicitly depending on ovs_key_nsh
starting with the same fields as ovs_nsh_key_base will only lead to bugs
in the future.

> +int nsh_hdr_from_nlattr(const struct nlattr *attr,
> + struct nshhdr *nsh, size_t size)
> +{
> + struct nlattr *a;
> + int rem;
> + u8 flags = 0;
> + u8 ttl = 0;
> + int mdlen = 0;
> +
> + /* validate_nsh has check this, so we needn't do duplicate check here
> +  */
> + nla_for_each_nested(a, attr, rem) {
> + int type = nla_type(a);
> +
> + switch (type) {
> + case OVS_NSH_KEY_ATTR_BASE: {
> + const struct ovs_nsh_key_base *base =
> + (struct ovs_nsh_key_base *)nla_data(a);

It's not necessary to retype void * explicitly. Just assign it.

> +

Re: [ovs-dev] locks for clustered OVSDB

2017-09-25 Thread Han Zhou
On Mon, Sep 25, 2017 at 2:36 AM, Miguel Angel Ajo Pelayo <
majop...@redhat.com> wrote:
>
> I believe Lucas Alvares could give you valuable feedback on this as
> he was planning to use this as a mechanism for synchronization on
> the networking-ovn side (if I didn't get it wrong).
>
> I believe he's back by October.
>
> Best regards.
> Miguel Ángel.
>
> On Fri, Sep 22, 2017 at 6:58 PM, Ben Pfaff  wrote:
>
> > We've had a couple of brief discussions during the OVN meeting about
> > locks in OVSDB.  As I understand it, a few services use OVSDB locks to
> > avoid duplicating work.  The question is whether and how to extend OVSDB
> > locks to a distributed context.
> >
> > First, I think it's worth reviewing how OVSDB locks work, filling in
> > some of the implications that aren't covered by RFC 7047.  OVSDB locks
> > are server-level (not database-level) objects that can be owned by at
> > most one client at a time.  Clients can obtain them either through a
> > "lock" operation, in which case they get queued to obtain the lock when
> > it's no longer owned by anyone else, or through a "steal" operation that
> > always succeeds immediately, kicking out whoever (if anyone) previously
> > owned the lock.  A client loses a lock whenever it releases it with an
> > "unlock" operation or whenever its connection to the server drops.  The
> > server notifies a client whenever it acquires a lock or whenever it is
> > stolen by another client.
> >
> > This scheme works perfectly for one particular scenario: where the
> > resource protected by the lock is an OVSDB database (or part of one) on
> > the same server as the lock.  This is because OVSDB transactions include
> > an "assert" operation that names a lock and aborts the transaction if
> > the client does not hold the lock.  Since the server is both the lock
> > manager and the implementer of the transaction, it can always make the
> > correct decision.  This scenario could be extended to distributed locks
> > with the same guarantee.
> >
> > Another scenario that could work acceptably with distributed OVSDB locks
> > is one where the lock guards against duplicated work.  For example,
> > suppose a couple of ovn-northd instances both try to grab a lock, with
> > only the winner actually running, to avoid having both of them spend a
> > lot of CPU time recomputing the southbound flow table.  A distributed
> > version of OVSDB locks would probably work fine in practice for this,
> > although occasionally due to network propagation delays, "steal"
> > operations, or different ideas between client and server of when a
> > session has dropped, both ovn-northd might think they have the lock.
> > (If, however, they combined this with "assert" when they actually
> > committed their changes to the southbound database, then they would
> > never actually interfere with each other in database commits.)
> >
> > A scenario that would not work acceptably with distributed OVSDB locks,
> > without a change to the model, is where the lock ensures correctness,
> > that is, if two clients both think they have the lock then bad things
> > happen.  I believe that this requires clients to understand a concept of
> > leases, which OVSDB doesn't currently have.  The "steal" operation is
> > also problematic in this model since it would require canceling a
> > lease.  (This scenario also does not work acceptably with single-server
> > OVSDB locks.)
> >
> > I'd appreciate anyone's thoughts on the topic.
> >
> > This webpage is good reading:
> >
https://martin.kleppmann.com/2016/02/08/how-to-do-distributed-locking.html
> >
> > Thanks,
> >
> > Ben.
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> >
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Hi Ben,

If I understand correctly, you are saying that the clustering wouldn't
introduce any new restriction to the locking mechanism, comparing with the
current single node implementation. Both new and old approach support
avoiding redundant work, but not for correctness (unless "assert" or some
other "fence" is used). Is this correct?


Hi Miguel,

I think the solution Lucas is working on [1] falls in the category of
avoiding redundant work, so I think it should be fine.

[1] https://review.openstack.org/#/c/490834/

Thank,
Han
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Justin Pettit

> On Sep 25, 2017, at 5:54 AM, Russell Bryant  wrote:
> 
> There's some important changes in branch-2.7.  Can we do a 2.7.3
> release soon?  Does anyone have anything they need addressed first?

I'm happy to do a release.  I think we're due for a 2.8.1 release, too.

Ian wants a DPDK patch moved into branch-2.8.  Is there anything else that's 
important?

I'll get this started today.

Thanks,

--Justin


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] Debian: Update package dependency

2017-09-25 Thread Guru Shetty
On 20 September 2017 at 09:30, Yi-Hung Wei  wrote:

> Given that it is libopenvswitch-dev not libopenvswitch that depends on
> libssl-dev, this patch updates debian/control file to reflect that
> libopenvswitch-dev depends on libssl-dev, and libopenvswitch depends
> on openssl.
>
> Tested on Ubuntu 16.04 and 14.04.
>
> VMWare-BZ: #1953215
> CC: Ben Warren 
> Fixes: c33e9122dbc3 ("Debian: Rework libopenvswitch packages")
> Signed-off-by: Yi-Hung Wei 
>
I applied this to master and 2.8


> ---
>  debian/control | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> diff --git a/debian/control b/debian/control
> index 2173735692f8..a4c031d85f1b 100644
> --- a/debian/control
> +++ b/debian/control
> @@ -57,8 +57,7 @@ Description: Open vSwitch datapath module source - DKMS
> version
>  Package: openvswitch-common
>  Architecture: linux-any
>  Multi-Arch: foreign
> -Depends: openssl,
> - python (>= 2.7),
> +Depends: python (>= 2.7),
>   python-six,
>   libopenvswitch (= ${binary:Version}),
>   ${misc:Depends},
> @@ -81,7 +80,7 @@ Description: Open vSwitch common components
>  Package: libopenvswitch
>  Architecture: linux-any
>  Multi-Arch: same
> -Depends: libssl-dev,
> +Depends: openssl,
>   ${misc:Depends},
>   ${shlibs:Depends}
>  Description: Open vSwitch common components
> @@ -307,6 +306,7 @@ Architecture: linux-any
>  Multi-Arch: same
>  Depends:
>   libopenvswitch (>= ${binary:Version}),
> + libssl-dev,
>   ${misc:Depends}
>  Conflicts: openvswitch-dev
>  Replaces: openvswitch-dev
> --
> 2.7.4
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Stokes, Ian
> On Mon, Sep 25, 2017 at 04:04:42PM +, Stokes, Ian wrote:
> > > On Mon, Sep 25, 2017 at 08:54:32AM -0400, Russell Bryant wrote:
> > > > There's some important changes in branch-2.7.  Can we do a 2.7.3
> > > > release soon?  Does anyone have anything they need addressed first?
> > >
> > > +1
> >
> > There's a patch to update the 2.7 docs to use DPDK LTS 16.11.3, I'd like
> to see that included in the 2.7.3, DPDK 16.11.3 only includes bug fixes,
> no changes to behavior or features so its very low risk but recommended
> for users.
> >
> > https://mail.openvswitch.org/pipermail/ovs-dev/2017-September/338721.h
> > tml
> 
> Thanks for pointing it out.  I applied it to branch-2.7.

Thanks for applying this Ben, much appreciated.

On a slightly related note, there's a patch for the  2.8 branch to make it use 
DPDK 17.05.2 (latest stable release for 17.05), the same patch has been placed 
on Darrells intermediate branch for master but this patch is specific to 2.8. 
If you get a chance I'd really appreciate it being pushed to 2.8 branch also.

https://mail.openvswitch.org/pipermail/ovs-dev/2017-September/338723.html

Thanks
Ian
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Proposal for enabling dp_hash irrespective of OF version

2017-09-25 Thread Ben Pfaff
If the new selection method is superior to the default one, is there a
reason to require the controller to select it at all?

On Mon, Sep 25, 2017 at 04:28:26PM +, Jan Scheurich wrote:
> Hi Ben,
> 
> The current hard-coded default select group implementation requires every 
> single L4 flow to be load-balanced in an upcall and creates a dedicated 
> megaflow for every 5-tuple. This is clearly not scalable in deployments where 
> the number of parallel L4 flows and the L4 flow setup rate is unknown and 
> cannot be controlled (e.g. in Telco cloud deployments where the VNFs carry 
> end-user traffic). 
> 
> We need a scalable select group implementation to implement distributed ECMP 
> L3 forwarding in OVS. The dp_hash based implementation is mostly already in 
> place, thanks to Jarno, but its use is unfortunately tied to an OF 1.5 Group 
> Mod extension that our controllers do not support.
> 
> Our aim with this proposal is to provide a scalable select group 
> implementation in OVS for any OpenFlow controller. As there is no functional 
> difference between the original selection method and the dp_hash based one, I 
> don't see a reason why the controller should have to be involved for choosing 
> one or the other. This is different in nature from the extension to specify 
> the hash fields for the bucket selection.
> 
> As Cloud provider we would like to be able to configure OVS to by default use 
> apply the scalable dp_hash selection method, unless the controller specifies 
> something else. A natural place seems to be to add this as a bridge property.
> 
> Regards, Jan
> 
> N.B.  The pre-OF 1.5 Group Mod message simply has no extension mechanism to 
> carry addition group properties. We would have to add a proprietary new NX 
> Group Mod message, which would not make life much simpler for controllers.
> 
> > -Original Message-
> > From: ovs-dev-boun...@openvswitch.org 
> > [mailto:ovs-dev-boun...@openvswitch.org] On Behalf Of Ben Pfaff
> > Sent: Monday, 25 September, 2017 17:31
> > To: Nitin Katiyar 
> > 
> > This proposes adding a default selection method that could be set via
> > OVSDB.  This would probably work (I have not thought through the
> > implications), but the usual way that we would solve this kind of thing
> > in OVS is by making the features of the OF1.5 group_mod available in
> > earlier versions.
> 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Ben Pfaff
On Mon, Sep 25, 2017 at 04:04:42PM +, Stokes, Ian wrote:
> > On Mon, Sep 25, 2017 at 08:54:32AM -0400, Russell Bryant wrote:
> > > There's some important changes in branch-2.7.  Can we do a 2.7.3
> > > release soon?  Does anyone have anything they need addressed first?
> > 
> > +1
> 
> There's a patch to update the 2.7 docs to use DPDK LTS 16.11.3, I'd like to 
> see that included in the 2.7.3, DPDK 16.11.3 only includes bug fixes, no 
> changes to behavior or features so its very low risk but recommended for 
> users. 
> 
> https://mail.openvswitch.org/pipermail/ovs-dev/2017-September/338721.html

Thanks for pointing it out.  I applied it to branch-2.7.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH branch-2.7] docs: Use DPDK 16.11.3 stable release.

2017-09-25 Thread Ben Pfaff
On Wed, Sep 13, 2017 at 02:57:26PM +0100, Ian Stokes wrote:
> Modify docs and travis linux build script to use DPDK 16.11.3 stable
> branch to benefit from most recent bug fixes.
> 
> There are no new features introduced in the DPDK release, only back
> ported bug fixes. For completeness these bug fixes have been
> documented under the 16.11.3 section in the link below.
> 
> http://dpdk.org/doc/guides-16.11/rel_notes/release_16_11.html
> 
> Signed-off-by: Ian Stokes 

Applied, thanks!
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Proposal for enabling dp_hash irrespective of OF version

2017-09-25 Thread Jan Scheurich
Hi Ben,

The current hard-coded default select group implementation requires every 
single L4 flow to be load-balanced in an upcall and creates a dedicated 
megaflow for every 5-tuple. This is clearly not scalable in deployments where 
the number of parallel L4 flows and the L4 flow setup rate is unknown and 
cannot be controlled (e.g. in Telco cloud deployments where the VNFs carry 
end-user traffic). 

We need a scalable select group implementation to implement distributed ECMP L3 
forwarding in OVS. The dp_hash based implementation is mostly already in place, 
thanks to Jarno, but its use is unfortunately tied to an OF 1.5 Group Mod 
extension that our controllers do not support.

Our aim with this proposal is to provide a scalable select group implementation 
in OVS for any OpenFlow controller. As there is no functional difference 
between the original selection method and the dp_hash based one, I don't see a 
reason why the controller should have to be involved for choosing one or the 
other. This is different in nature from the extension to specify the hash 
fields for the bucket selection.

As Cloud provider we would like to be able to configure OVS to by default use 
apply the scalable dp_hash selection method, unless the controller specifies 
something else. A natural place seems to be to add this as a bridge property.

Regards, Jan

N.B.  The pre-OF 1.5 Group Mod message simply has no extension mechanism to 
carry addition group properties. We would have to add a proprietary new NX 
Group Mod message, which would not make life much simpler for controllers.

> -Original Message-
> From: ovs-dev-boun...@openvswitch.org 
> [mailto:ovs-dev-boun...@openvswitch.org] On Behalf Of Ben Pfaff
> Sent: Monday, 25 September, 2017 17:31
> To: Nitin Katiyar 
> 
> This proposes adding a default selection method that could be set via
> OVSDB.  This would probably work (I have not thought through the
> implications), but the usual way that we would solve this kind of thing
> in OVS is by making the features of the OF1.5 group_mod available in
> earlier versions.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Stokes, Ian
> On Mon, Sep 25, 2017 at 08:54:32AM -0400, Russell Bryant wrote:
> > There's some important changes in branch-2.7.  Can we do a 2.7.3
> > release soon?  Does anyone have anything they need addressed first?
> 
> +1

There's a patch to update the 2.7 docs to use DPDK LTS 16.11.3, I'd like to see 
that included in the 2.7.3, DPDK 16.11.3 only includes bug fixes, no changes to 
behavior or features so its very low risk but recommended for users. 

https://mail.openvswitch.org/pipermail/ovs-dev/2017-September/338721.html

Ian


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/3] dpif-netdev: Rename rxq_interval.

2017-09-25 Thread Jan Scheurich
I'm fine with the new name.
/Jan

> -Original Message-
> From: ovs-dev-boun...@openvswitch.org 
> [mailto:ovs-dev-boun...@openvswitch.org] On Behalf Of Darrell Ball
> Sent: Friday, 22 September, 2017 21:22
> To: Kevin Traynor ; d...@openvswitch.org; 
> i.maxim...@samsung.com
> Subject: Re: [ovs-dev] [PATCH 1/3] dpif-netdev: Rename rxq_interval.
> 
> Are there any other comments?
> 
> 
> 
> On 8/30/17, 10:49 AM, "Darrell Ball"  wrote:
> 
> Thanks Kevin
> 
> Naming is hard.
> The name looks a bit more intuitive and matches closely with the 
> description previously added.
> 
> Darrell
> 
> On 8/30/17, 10:45 AM, "Kevin Traynor"  wrote:
> 
> rxq_interval was added before there was other #defines
> and code related to rxq intervals.
> 
> Rename to rxq_next_cycles_store in order to make it more intuitive.
> 
> Reported-by: Ilya Maximets 
> Signed-off-by: Kevin Traynor 
> ---
>  lib/dpif-netdev.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 071ec14..55d5656 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -576,5 +576,5 @@ struct dp_netdev_pmd_thread {
>  /* End of the next time interval for which processing cycles
> are stored for each polled rxq. */
> -long long int rxq_interval;
> +long long int rxq_next_cycle_store;
> 
>  /* Statistics. */
> @@ -4507,5 +4507,5 @@ dp_netdev_configure_pmd(struct 
> dp_netdev_pmd_thread *pmd, struct dp_netdev *dp,
>  cmap_init(&pmd->classifiers);
>  pmd->next_optimization = time_msec() + 
> DPCLS_OPTIMIZATION_INTERVAL;
> -pmd->rxq_interval = time_msec() + PMD_RXQ_INTERVAL_LEN;
> +pmd->rxq_next_cycle_store = time_msec() + PMD_RXQ_INTERVAL_LEN;
>  hmap_init(&pmd->poll_list);
>  hmap_init(&pmd->tx_ports);
> @@ -5951,5 +5951,5 @@ dp_netdev_pmd_try_optimize(struct 
> dp_netdev_pmd_thread *pmd,
>  long long int now = time_msec();
> 
> -if (now > pmd->rxq_interval) {
> +if (now > pmd->rxq_next_cycle_store) {
>  /* Get the cycles that were used to process each queue and 
> store. */
>  for (unsigned i = 0; i < poll_cnt; i++) {
> @@ -5961,5 +5961,5 @@ dp_netdev_pmd_try_optimize(struct 
> dp_netdev_pmd_thread *pmd,
>  }
>  /* Start new measuring interval */
> -pmd->rxq_interval = now + PMD_RXQ_INTERVAL_LEN;
> +pmd->rxq_next_cycle_store = now + PMD_RXQ_INTERVAL_LEN;
>  }
> 
> --
> 1.8.3.1
> 
> 
> 
> 
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] Proposal for enabling dp_hash irrespective of OF version

2017-09-25 Thread Ben Pfaff
On Mon, Sep 25, 2017 at 11:15:55AM +, Nitin Katiyar wrote:
> Hi,
> I have a proposal to add a provision for using "dp_hash" as selection method 
> (which can currently be used with OF1.5 based controllers only) irrespective 
> of the OF version being used by controller.
> 
> The link for document is : 
> https://docs.google.com/document/d/13Jiwbs0atV_Y8Vatj6SmQeB_qdM-AyHu8uhtsO6P-_8/edit?usp=sharing

This proposes adding a default selection method that could be set via
OVSDB.  This would probably work (I have not thought through the
implications), but the usual way that we would solve this kind of thing
in OVS is by making the features of the OF1.5 group_mod available in
earlier versions.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 3/4] netdev-dpdk: Remove useless cutlen.

2017-09-25 Thread Bodireddy, Bhanuprakash
>Cutlen already applied while processing OVS_ACTION_ATTR_OUTPUT.
>
>Signed-off-by: Ilya Maximets 

LGTM, 
The below redundant calls can be removed as packet cutlen is already applied in 
dpif layer.

-Bhanuprakash.

>---
> lib/netdev-dpdk.c | 5 -
> 1 file changed, 5 deletions(-)
>
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 8e3158f..ddcc574
>100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -1819,8 +1819,6 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
>struct dp_packet_batch *batch)
> int newcnt = 0;
> int i;
>
>-dp_packet_batch_apply_cutlen(batch);
>-
> for (i = 0; i < batch->count; i++) {
> int size = dp_packet_size(batch->packets[i]);
>
>@@ -1879,7 +1877,6 @@ netdev_dpdk_vhost_send(struct netdev *netdev,
>int qid,
> dpdk_do_tx_copy(netdev, qid, batch);
> dp_packet_delete_batch(batch, true);
> } else {
>-dp_packet_batch_apply_cutlen(batch);
> __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch-
>>count);
> }
> return 0;
>@@ -1910,8 +1907,6 @@ netdev_dpdk_send__(struct netdev_dpdk *dev, int
>qid,
> int cnt = batch->count;
> struct rte_mbuf **pkts = (struct rte_mbuf **) batch->packets;
>
>-dp_packet_batch_apply_cutlen(batch);
>-
> cnt = netdev_dpdk_filter_packet_len(dev, pkts, cnt);
> cnt = netdev_dpdk_qos_run(dev, pkts, cnt);
> dropped = batch->count - cnt;
>--
>2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 2/4] netdev: Remove unused may_steal.

2017-09-25 Thread Bodireddy, Bhanuprakash
>Not needed anymore because 'may_steal' already handled on dpif-netdev
>layer and always true;

LGTM.
 'may_steal' is still used by QoS policer in netdev layer. I am not familiar 
with Policer functionality but
Just wondering may_steal isn't needed with this change.

- Bhanuprakash.

>
>Signed-off-by: Ilya Maximets 
>---
> lib/dpif-netdev.c |  2 +-
> lib/netdev-bsd.c  |  4 ++--
> lib/netdev-dpdk.c | 25 +++--
> lib/netdev-dummy.c|  4 ++--
> lib/netdev-linux.c|  4 ++--
> lib/netdev-provider.h |  7 +++
> lib/netdev.c  | 12 
> lib/netdev.h  |  2 +-
> 8 files changed, 26 insertions(+), 34 deletions(-)
>
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index a2a25be..dcf55f3
>100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -3121,7 +3121,7 @@ dp_netdev_pmd_flush_output_on_port(struct
>dp_netdev_pmd_thread *pmd,
> tx_qid = pmd->static_tx_qid;
> }
>
>-netdev_send(p->port->netdev, tx_qid, &p->output_pkts, true,
>dynamic_txqs);
>+netdev_send(p->port->netdev, tx_qid, &p->output_pkts,
>+ dynamic_txqs);
> dp_packet_batch_init(&p->output_pkts);
> }
>
>diff --git a/lib/netdev-bsd.c b/lib/netdev-bsd.c index 8a4cdb3..4f243b5 100644
>--- a/lib/netdev-bsd.c
>+++ b/lib/netdev-bsd.c
>@@ -680,7 +680,7 @@ netdev_bsd_rxq_drain(struct netdev_rxq *rxq_)
>  */
> static int
> netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
>-struct dp_packet_batch *batch, bool may_steal,
>+struct dp_packet_batch *batch,
> bool concurrent_txq OVS_UNUSED)  {
> struct netdev_bsd *dev = netdev_bsd_cast(netdev_); @@ -728,7 +728,7
>@@ netdev_bsd_send(struct netdev *netdev_, int qid OVS_UNUSED,
> }
>
> ovs_mutex_unlock(&dev->mutex);
>-dp_packet_delete_batch(batch, may_steal);
>+dp_packet_delete_batch(batch, true);
>
> return error;
> }
>diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 1d82bca..8e3158f
>100644
>--- a/lib/netdev-dpdk.c
>+++ b/lib/netdev-dpdk.c
>@@ -1872,12 +1872,12 @@ dpdk_do_tx_copy(struct netdev *netdev, int qid,
>struct dp_packet_batch *batch)  static int  netdev_dpdk_vhost_send(struct
>netdev *netdev, int qid,
>struct dp_packet_batch *batch,
>-   bool may_steal, bool concurrent_txq OVS_UNUSED)
>+   bool concurrent_txq OVS_UNUSED)
> {
>
>-if (OVS_UNLIKELY(!may_steal || batch->packets[0]->source !=
>DPBUF_DPDK)) {
>+if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> dpdk_do_tx_copy(netdev, qid, batch);
>-dp_packet_delete_batch(batch, may_steal);
>+dp_packet_delete_batch(batch, true);
> } else {
> dp_packet_batch_apply_cutlen(batch);
> __netdev_dpdk_vhost_send(netdev, qid, batch->packets, batch-
>>count); @@ -1887,11 +1887,11 @@ netdev_dpdk_vhost_send(struct netdev
>*netdev, int qid,
>
> static inline void
> netdev_dpdk_send__(struct netdev_dpdk *dev, int qid,
>-   struct dp_packet_batch *batch, bool may_steal,
>+   struct dp_packet_batch *batch,
>bool concurrent_txq)  {
> if (OVS_UNLIKELY(!(dev->flags & NETDEV_UP))) {
>-dp_packet_delete_batch(batch, may_steal);
>+dp_packet_delete_batch(batch, true);
> return;
> }
>
>@@ -1900,12 +1900,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev,
>int qid,
> rte_spinlock_lock(&dev->tx_q[qid].tx_lock);
> }
>
>-if (OVS_UNLIKELY(!may_steal ||
>- batch->packets[0]->source != DPBUF_DPDK)) {
>+if (OVS_UNLIKELY(batch->packets[0]->source != DPBUF_DPDK)) {
> struct netdev *netdev = &dev->up;
>
> dpdk_do_tx_copy(netdev, qid, batch);
>-dp_packet_delete_batch(batch, may_steal);
>+dp_packet_delete_batch(batch, true);
> } else {
> int dropped;
> int cnt = batch->count;
>@@ -1933,12 +1932,11 @@ netdev_dpdk_send__(struct netdev_dpdk *dev,
>int qid,
>
> static int
> netdev_dpdk_eth_send(struct netdev *netdev, int qid,
>- struct dp_packet_batch *batch, bool may_steal,
>- bool concurrent_txq)
>+ struct dp_packet_batch *batch, bool
>+ concurrent_txq)
> {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
>
>-netdev_dpdk_send__(dev, qid, batch, may_steal, concurrent_txq);
>+netdev_dpdk_send__(dev, qid, batch, concurrent_txq);
> return 0;
> }
>
>@@ -2905,8 +2903,7 @@ dpdk_ring_open(const char dev_name[],
>dpdk_port_t *eth_port_id)
>
> static int
> netdev_dpdk_ring_send(struct netdev *netdev, int qid,
>-  struct dp_packet_batch *batch, bool may_steal,
>-  bool concurrent_txq)
>+  struct dp_packet_batch *batch, bool
>+ concurrent_txq)
> {
> struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> unsigned i;
>@@ -2919,7 +2916,7 @@ netdev_dpdk_ring_send(struct netdev *netdev, int
>qid,
>   

Re: [ovs-dev] [PATCH] ovs-save: Use --bundle to restore flows (on OpenFlow 1.4+)

2017-09-25 Thread Timothy M. Redaelli
On 09/23/2017 01:09 AM, Flavio Leitner wrote:
[...]
> 
> Where is $workdir set?  Looks like it's empty and will use root fs.
> 
> Although this is a small patch, it actually does two things. One is
> fixing to use highest enabled OF and another is replacing echo with
> printf.
> 
> I suggest to split them apart so it's easier to review.

All done, thank you.

https://patchwork.ozlabs.org/project/openvswitch/list/?series=4973
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 2.7.3 release?

2017-09-25 Thread Ben Pfaff
On Mon, Sep 25, 2017 at 08:54:32AM -0400, Russell Bryant wrote:
> There's some important changes in branch-2.7.  Can we do a 2.7.3
> release soon?  Does anyone have anything they need addressed first?

+1
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v3 1/4] dpif-netdev: Output packet batching.

2017-09-25 Thread Bodireddy, Bhanuprakash
Hi Ilya,

This series needs to be rebased.  Few comments below.

>While processing incoming batch of packets they are scattered across many
>per-flow batches and sent separately.
>
>This becomes an issue while using more than a few flows.
>
>For example if we have balanced-tcp OvS bonding with 2 ports there will be
>256 datapath internal flows for each dp_hash pattern. This will lead to
>scattering of a single recieved batch across all of that 256 per-flow batches 
>and
>invoking send for each packet separately. This behaviour greatly degrades
>overall performance of netdev_send because of inability to use advantages of
>vectorized transmit functions.
>But the half (if 2 ports in bonding) of datapath flows will have the same 
>output
>actions. This means that we can collect them in a single place back and send at
>once using single call to netdev_send. This patch introduces per-port packet
>batch for output packets for that purpose.
>
>'output_pkts' batch is thread local and located in send port cache.
>
>Signed-off-by: Ilya Maximets 
>---
> lib/dpif-netdev.c | 104
>++
> 1 file changed, 82 insertions(+), 22 deletions(-)
>
>diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index e2cd931..a2a25be
>100644
>--- a/lib/dpif-netdev.c
>+++ b/lib/dpif-netdev.c
>@@ -502,6 +502,7 @@ struct tx_port {
> int qid;
> long long last_used;
> struct hmap_node node;
>+struct dp_packet_batch output_pkts;
> };
>
> /* PMD: Poll modes drivers.  PMD accesses devices via polling to eliminate
>@@ -633,9 +634,10 @@ static void dp_netdev_execute_actions(struct
>dp_netdev_pmd_thread *pmd,
>   size_t actions_len,
>   long long now);  static void 
> dp_netdev_input(struct
>dp_netdev_pmd_thread *,
>-struct dp_packet_batch *, odp_port_t port_no);
>+struct dp_packet_batch *, odp_port_t port_no,
>+long long now);
> static void dp_netdev_recirculate(struct dp_netdev_pmd_thread *,
>-  struct dp_packet_batch *);
>+  struct dp_packet_batch *, long long
>+ now);
>
> static void dp_netdev_disable_upcall(struct dp_netdev *);  static void
>dp_netdev_pmd_reload_done(struct dp_netdev_pmd_thread *pmd); @@ -
>667,6 +669,9 @@ static void dp_netdev_add_rxq_to_pmd(struct
>dp_netdev_pmd_thread *pmd,  static void
>dp_netdev_del_rxq_from_pmd(struct dp_netdev_pmd_thread *pmd,
>struct rxq_poll *poll)
> OVS_REQUIRES(pmd->port_mutex);
>+static void
>+dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread
>*pmd,
>+   long long now);
> static void reconfigure_datapath(struct dp_netdev *dp)
> OVS_REQUIRES(dp->port_mutex);
> static bool dp_netdev_pmd_try_ref(struct dp_netdev_pmd_thread *pmd);
>@@ -2809,6 +2814,7 @@ dpif_netdev_execute(struct dpif *dpif, struct
>dpif_execute *execute)
> struct dp_netdev *dp = get_dp_netdev(dpif);
> struct dp_netdev_pmd_thread *pmd;
> struct dp_packet_batch pp;
>+long long now = time_msec();

[BHANU] Calling time_msec() can be moved little down in this function, may be 
after the 'probe' check.

>
> if (dp_packet_size(execute->packet) < ETH_HEADER_LEN ||
> dp_packet_size(execute->packet) > UINT16_MAX) { @@ -2851,8 +2857,8
>@@ dpif_netdev_execute(struct dpif *dpif, struct dpif_execute *execute)
>
> dp_packet_batch_init_packet(&pp, execute->packet);
> dp_netdev_execute_actions(pmd, &pp, false, execute->flow,
>-  execute->actions, execute->actions_len,
>-  time_msec());
>+  execute->actions, execute->actions_len, now);
>+dp_netdev_pmd_flush_output_packets(pmd, now);

[BHANU] Is this code path mostly run in non-pmd thread context? I can only 
think of bfd case where the 
where all the above runs in monitoring thread(non-pmd) context.  

>
> if (pmd->core_id == NON_PMD_CORE_ID) {
> ovs_mutex_unlock(&dp->non_pmd_mutex);
>@@ -3101,6 +3107,37 @@ cycles_count_intermediate(struct
>dp_netdev_pmd_thread *pmd,
> non_atomic_ullong_add(&pmd->cycles.n[type], interval);  }
>
>+static void
>+dp_netdev_pmd_flush_output_on_port(struct dp_netdev_pmd_thread
>*pmd,
>+   struct tx_port *p, long long now) {
>+int tx_qid;
>+bool dynamic_txqs;
>+
>+dynamic_txqs = p->port->dynamic_txqs;
>+if (dynamic_txqs) {
>+tx_qid = dpif_netdev_xps_get_tx_qid(pmd, p, now);
>+} else {
>+tx_qid = pmd->static_tx_qid;
>+}
>+
>+netdev_send(p->port->netdev, tx_qid, &p->output_pkts, true,
>dynamic_txqs);
>+dp_packet_batch_init(&p->output_pkts);
>+}
>+
>+static void
>+dp_netdev_pmd_flush_output_packets(struct dp_netdev_pmd_thread
>*pmd,
>+   long long now) {
>+str

Re: [ovs-dev] [PATCH V2 4/4] netdev-tc-offloads: Add support for action set

2017-09-25 Thread Paul Blakey



On 18/09/2017 18:05, Simon Horman wrote:

On Mon, Sep 18, 2017 at 07:16:04AM +0300, Roi Dayan wrote:

From: Paul Blakey 

Implement support for offloading ovs action set using
tc header rewrite action.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
---
  lib/netdev-tc-offloads.c | 201 +--
  1 file changed, 195 insertions(+), 6 deletions(-)

diff --git a/lib/netdev-tc-offloads.c b/lib/netdev-tc-offloads.c
index 3c145c2..4044a77 100644
--- a/lib/netdev-tc-offloads.c
+++ b/lib/netdev-tc-offloads.c
@@ -27,11 +27,13 @@
  #include "openvswitch/ofpbuf.h"
  #include "openvswitch/thread.h"
  #include "openvswitch/types.h"
+#include "openvswitch/util.h"
  #include "openvswitch/vlog.h"
  #include "netdev-linux.h"
  #include "netlink.h"
  #include "netlink-socket.h"
  #include "odp-netlink.h"
+#include "odp-util.h"
  #include "tc.h"
  #include "unaligned.h"
  #include "util.h"
@@ -41,6 +43,76 @@ VLOG_DEFINE_THIS_MODULE(netdev_tc_offloads);
  static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);
  
  static struct hmap ufid_tc = HMAP_INITIALIZER(&ufid_tc);

+
+struct netlink_field {
+int offset;
+int flower_offset;
+int size;
+};
+
+static struct netlink_field set_flower_map[][3] = {
+[OVS_KEY_ATTR_IPV4] = {
+{ offsetof(struct ovs_key_ipv4, ipv4_src),
+  offsetof(struct tc_flower_key, ipv4.ipv4_src),
+  MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src)
+},
+{ offsetof(struct ovs_key_ipv4, ipv4_dst),
+  offsetof(struct tc_flower_key, ipv4.ipv4_dst),
+  MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst)
+},
+{ offsetof(struct ovs_key_ipv4, ipv4_ttl),
+  offsetof(struct tc_flower_key, ipv4.rewrite_ttl),
+  MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl)
+},
+},
+[OVS_KEY_ATTR_IPV6] = {
+{ offsetof(struct ovs_key_ipv6, ipv6_src),
+  offsetof(struct tc_flower_key, ipv6.ipv6_src),
+  MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_src)
+},
+{ offsetof(struct ovs_key_ipv6, ipv6_dst),
+  offsetof(struct tc_flower_key, ipv6.ipv6_dst),
+  MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_dst)
+},
+},
+[OVS_KEY_ATTR_ETHERNET] = {
+{ offsetof(struct ovs_key_ethernet, eth_src),
+  offsetof(struct tc_flower_key, src_mac),
+  MEMBER_SIZEOF(struct tc_flower_key, src_mac)
+},
+{ offsetof(struct ovs_key_ethernet, eth_dst),
+  offsetof(struct tc_flower_key, dst_mac),
+  MEMBER_SIZEOF(struct tc_flower_key, dst_mac)
+},
+},
+[OVS_KEY_ATTR_ETHERTYPE] = {
+{ 0,
+  offsetof(struct tc_flower_key, eth_type),
+  MEMBER_SIZEOF(struct tc_flower_key, eth_type)
+},
+},
+[OVS_KEY_ATTR_TCP] = {
+{ offsetof(struct ovs_key_tcp, tcp_src),
+  offsetof(struct tc_flower_key, tcp_src),
+  MEMBER_SIZEOF(struct tc_flower_key, tcp_src)
+},
+{ offsetof(struct ovs_key_tcp, tcp_dst),
+  offsetof(struct tc_flower_key, tcp_dst),
+  MEMBER_SIZEOF(struct tc_flower_key, tcp_dst)
+},
+},
+[OVS_KEY_ATTR_UDP] = {
+{ offsetof(struct ovs_key_udp, udp_src),
+  offsetof(struct tc_flower_key, udp_src),
+  MEMBER_SIZEOF(struct tc_flower_key, udp_src)
+},
+{ offsetof(struct ovs_key_udp, udp_dst),
+  offsetof(struct tc_flower_key, udp_dst),
+  MEMBER_SIZEOF(struct tc_flower_key, udp_dst)
+},
+},
+};


Do you have any plans to add the following?

OVS_KEY_ATTR_ICMP
OVS_KEY_ATTR_ICMPV6
OVS_KEY_ATTR_ARP
OVS_KEY_ATTR_ND
OVS_KEY_ATTR_SCTP
OVS_KEY_ATTR_TCP_FLAGS

...




Yes, if pedit supports them (which I think it should). do you want it 
me to add it to this patchset, for us it would be faster to add those 
later after this is accepted as we have some inside testsuite for the 
currently supported ones and will need to come up with new ones for these.




___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 2/3] ovs-save: Use a file to restore flows instead of heredoc

2017-09-25 Thread Timothy Redaelli
This patch makes ovs-save to use a file to restore flows instead of using
shell script here-document.
This is needed since eval + here-documents are much slower than reading a file
with the rules directly.

Signed-off-by: Timothy Redaelli 
---
 utilities/ovs-save | 13 +++--
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/utilities/ovs-save b/utilities/ovs-save
index fc9418c3d..da65c41ec 100755
--- a/utilities/ovs-save
+++ b/utilities/ovs-save
@@ -110,6 +110,7 @@ save_flows () {
 exit 1
 fi
 
+workdir=$(mktemp -d "${TMPDIR:-/tmp}/ovs-save.XX")
 for bridge in "$@"; do
 # Get the highest enabled OpenFlow version
 ofp_version=$(get_highest_ofp_version "$bridge")
@@ -120,19 +121,19 @@ save_flows () {
  cnt++;printf "{class="$1",type="$2",len="$3"}->"$4}'
 echo "'"
 
-echo -n "ovs-ofctl -O $ofp_version add-flows ${bridge} "
+echo -n "ovs-ofctl -O $ofp_version add-flows ${bridge} " \
+"\"$workdir/$bridge.flows.dump\""
 
 # If possible, use OpenFlow 1.4 atomic bundle transaction to add flows
-[ ${ofp_version#OpenFlow} -ge 14 ] && echo -n "--bundle "
-
-echo  "- << EOF"
+[ ${ofp_version#OpenFlow} -ge 14 ] && echo " --bundle" || echo
 
 ovs-ofctl -O $ofp_version dump-flows --no-names --no-stats "$bridge" | 
\
 sed -e '/NXST_FLOW/d' \
 -e '/OFPST_FLOW/d' \
--e 's/\(idle\|hard\)_age=[^,]*,//g'
-echo "EOF"
+-e 's/\(idle\|hard\)_age=[^,]*,//g' > \
+"$workdir/$bridge.flows.dump"
 done
+echo "rm -rf \"$workdir\""
 }
 
 while [ $# -ne 0 ]
-- 
2.13.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 3/3] ovs-save: Replace "echo -n" with "printf"

2017-09-25 Thread Timothy Redaelli
This is neeed since "echo -n" is not POSIX and may not work with some shells.

Signed-off-by: Timothy Redaelli 
---
 utilities/ovs-save | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/utilities/ovs-save b/utilities/ovs-save
index da65c41ec..ea8fb6a45 100755
--- a/utilities/ovs-save
+++ b/utilities/ovs-save
@@ -115,13 +115,13 @@ save_flows () {
 # Get the highest enabled OpenFlow version
 ofp_version=$(get_highest_ofp_version "$bridge")
 
-echo -n "ovs-ofctl add-tlv-map ${bridge} '"
+printf "%s" "ovs-ofctl add-tlv-map ${bridge} '"
 ovs-ofctl dump-tlv-map ${bridge} | \
 awk '/^ 0x/ {if (cnt != 0) printf ","; \
  cnt++;printf "{class="$1",type="$2",len="$3"}->"$4}'
 echo "'"
 
-echo -n "ovs-ofctl -O $ofp_version add-flows ${bridge} " \
+printf "%s" "ovs-ofctl -O $ofp_version add-flows ${bridge} " \
 "\"$workdir/$bridge.flows.dump\""
 
 # If possible, use OpenFlow 1.4 atomic bundle transaction to add flows
-- 
2.13.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 1/3] ovs-save: Use --bundle to restore flows (on OpenFlow 1.4+)

2017-09-25 Thread Timothy Redaelli
If possible, use OpenFlow 1.4 atomic bundle transaction to restore flows.
The patch uses also the highest enabled OpenFlow version to do the queries.

With the actual implementation, if you have the default OpenFlow version
disabled then ovs-save fails. This patch also fixes that problem.

Signed-off-by: Timothy Redaelli 
---
 utilities/ovs-save | 22 +++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/utilities/ovs-save b/utilities/ovs-save
index 8b8dbf421..fc9418c3d 100755
--- a/utilities/ovs-save
+++ b/utilities/ovs-save
@@ -99,6 +99,11 @@ save_interfaces () {
 fi
 }
 
+get_highest_ofp_version() {
+ovs-vsctl get bridge "$1" protocols | \
+awk -F '"' '{ print (NF>1)? $(NF-1) : "OpenFlow14" }'
+}
+
 save_flows () {
 if (ovs-ofctl --version) > /dev/null 2>&1; then :; else
 echo "$0: ovs-ofctl not found in $PATH" >&2
@@ -106,15 +111,26 @@ save_flows () {
 fi
 
 for bridge in "$@"; do
+# Get the highest enabled OpenFlow version
+ofp_version=$(get_highest_ofp_version "$bridge")
+
 echo -n "ovs-ofctl add-tlv-map ${bridge} '"
 ovs-ofctl dump-tlv-map ${bridge} | \
 awk '/^ 0x/ {if (cnt != 0) printf ","; \
  cnt++;printf "{class="$1",type="$2",len="$3"}->"$4}'
 echo "'"
 
-echo "ovs-ofctl add-flows ${bridge} - << EOF"
-ovs-ofctl dump-flows "${bridge}" | sed -e '/NXST_FLOW/d' \
--e 's/\(idle\|hard\)_age=[^,]*,//g'
+echo -n "ovs-ofctl -O $ofp_version add-flows ${bridge} "
+
+# If possible, use OpenFlow 1.4 atomic bundle transaction to add flows
+[ ${ofp_version#OpenFlow} -ge 14 ] && echo -n "--bundle "
+
+echo  "- << EOF"
+
+ovs-ofctl -O $ofp_version dump-flows --no-names --no-stats "$bridge" | 
\
+sed -e '/NXST_FLOW/d' \
+-e '/OFPST_FLOW/d' \
+-e 's/\(idle\|hard\)_age=[^,]*,//g'
 echo "EOF"
 done
 }
-- 
2.13.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2 0/3] ovs-save: Some refactors to speed-up save-flows

2017-09-25 Thread Timothy Redaelli
v1->v2: Splitted up to multiple patches and fixed workdir usage

Was: https://patchwork.ozlabs.org/patch/811532/

Timothy Redaelli (3):
  ovs-save: Use --bundle to restore flows (on OpenFlow 1.4+)
  ovs-save: Use a file to restore flows instead of heredoc
  ovs-save: Replace "echo -n" with "printf"

 utilities/ovs-save | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

-- 
2.13.5

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v9] openvswitch: enable NSH support

2017-09-25 Thread Yi Yang
v8->v9
 - Fix build error reported by daily intel build
   because nsh module isn't selected by openvswitch

v7->v8
 - Rework nested value and mask for OVS_KEY_ATTR_NSH
 - Change pop_nsh to adapt to nsh kernel module
 - Fix many issues per comments from Jiri Benc

v6->v7
 - Remove NSH GSO patches in v6 because Jiri Benc
   reworked it as another patch series and they have
   been merged.
 - Change it to adapt to nsh kernel module added by NSH
   GSO patch series

v5->v6
 - Fix the rest comments for v4.
 - Add NSH GSO support for VxLAN-gpe + NSH and
   Eth + NSH.

v4->v5
 - Fix many comments by Jiri Benc and Eric Garver
   for v4.

v3->v4
 - Add new NSH match field ttl
 - Update NSH header to the latest format
   which will be final format and won't change
   per its author's confirmation.
 - Fix comments for v3.

v2->v3
 - Change OVS_KEY_ATTR_NSH to nested key to handle
   length-fixed attributes and length-variable
   attriubte more flexibly.
 - Remove struct ovs_action_push_nsh completely
 - Add code to handle nested attribute for SET_MASKED
 - Change PUSH_NSH to use the nested OVS_KEY_ATTR_NSH
   to transfer NSH header data.
 - Fix comments and coding style issues by Jiri and Eric

v1->v2
 - Change encap_nsh and decap_nsh to push_nsh and pop_nsh
 - Dynamically allocate struct ovs_action_push_nsh for
   length-variable metadata.

OVS master and 2.8 branch has merged NSH userspace
patch series, this patch is to enable NSH support
in kernel data path in order that OVS can support
NSH in compat mode by porting this.

Signed-off-by: Yi Yang 
---
 include/net/nsh.h|   3 +
 include/uapi/linux/openvswitch.h |  29 
 net/nsh/nsh.c|  53 +++
 net/openvswitch/Kconfig  |   1 +
 net/openvswitch/actions.c| 112 ++
 net/openvswitch/flow.c   |  51 ++
 net/openvswitch/flow.h   |  11 ++
 net/openvswitch/flow_netlink.c   | 324 ++-
 net/openvswitch/flow_netlink.h   |   5 +
 9 files changed, 588 insertions(+), 1 deletion(-)

diff --git a/include/net/nsh.h b/include/net/nsh.h
index a1eaea2..b886d33 100644
--- a/include/net/nsh.h
+++ b/include/net/nsh.h
@@ -304,4 +304,7 @@ static inline void nsh_set_flags_ttl_len(struct nshhdr 
*nsh, u8 flags,
NSH_FLAGS_MASK | NSH_TTL_MASK | NSH_LEN_MASK);
 }
 
+int skb_push_nsh(struct sk_buff *skb, const struct nshhdr *src_nsh_hdr);
+int skb_pop_nsh(struct sk_buff *skb);
+
 #endif /* __NET_NSH_H */
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 156ee4c..c1a785c 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -333,6 +333,7 @@ enum ovs_key_attr {
OVS_KEY_ATTR_CT_LABELS, /* 16-octet connection tracking label */
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4,   /* struct ovs_key_ct_tuple_ipv4 */
OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6,   /* struct ovs_key_ct_tuple_ipv6 */
+   OVS_KEY_ATTR_NSH,   /* Nested set of ovs_nsh_key_* */
 
 #ifdef __KERNEL__
OVS_KEY_ATTR_TUNNEL_INFO,  /* struct ip_tunnel_info */
@@ -491,6 +492,30 @@ struct ovs_key_ct_tuple_ipv6 {
__u8   ipv6_proto;
 };
 
+enum ovs_nsh_key_attr {
+   OVS_NSH_KEY_ATTR_UNSPEC,
+   OVS_NSH_KEY_ATTR_BASE,  /* struct ovs_nsh_key_base. */
+   OVS_NSH_KEY_ATTR_MD1,   /* struct ovs_nsh_key_md1. */
+   OVS_NSH_KEY_ATTR_MD2,   /* variable-length octets for MD type 2. */
+   __OVS_NSH_KEY_ATTR_MAX
+};
+
+#define OVS_NSH_KEY_ATTR_MAX (__OVS_NSH_KEY_ATTR_MAX - 1)
+
+struct ovs_nsh_key_base {
+   __u8 flags;
+   __u8 ttl;
+   __u8 mdtype;
+   __u8 np;
+   __be32 path_hdr;
+};
+
+#define NSH_MD1_CONTEXT_SIZE 4
+
+struct ovs_nsh_key_md1 {
+   __be32 context[NSH_MD1_CONTEXT_SIZE];
+};
+
 /**
  * enum ovs_flow_attr - attributes for %OVS_FLOW_* commands.
  * @OVS_FLOW_ATTR_KEY: Nested %OVS_KEY_ATTR_* attributes specifying the flow
@@ -806,6 +831,8 @@ struct ovs_action_push_eth {
  * packet.
  * @OVS_ACTION_ATTR_POP_ETH: Pop the outermost Ethernet header off the
  * packet.
+ * @OVS_ACTION_ATTR_PUSH_NSH: push NSH header to the packet.
+ * @OVS_ACTION_ATTR_POP_NSH: pop the outermost NSH header off the packet.
  *
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
@@ -835,6 +862,8 @@ enum ovs_action_attr {
OVS_ACTION_ATTR_TRUNC,/* u32 struct ovs_action_trunc. */
OVS_ACTION_ATTR_PUSH_ETH, /* struct ovs_action_push_eth. */
OVS_ACTION_ATTR_POP_ETH,  /* No argument. */
+   OVS_ACTION_ATTR_PUSH_NSH, /* Nested OVS_NSH_KEY_ATTR_*. */
+   OVS_ACTION_ATTR_POP_NSH,  /* No argument. */
 
__OVS_ACTION_ATTR_MAX,/* Nothing past this will be accepted
   * from userspace. */
diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c
index 58fb827..54334ca 100644
--- a/net/nsh/nsh.c
+++ 

Re: [ovs-dev] [PATCH] ovn-controller: pending_ct_zones should be destroyed

2017-09-25 Thread Miguel Angel Ajo Pelayo
ooh thanks ':D

On Mon, Sep 25, 2017 at 3:00 PM, Russell Bryant  wrote:

> On Mon, Sep 25, 2017 at 5:01 AM, Miguel Angel Ajo Pelayo
>  wrote:
> > Acked-by: Miguel Angel Ajo 
> >
> > (Somehow I thought I had acked this, but apparently I only checked it was
> > ok and never responded)
>
> You had acked a previous version of the patch.
>
> I applied this to master and also updated the AUTHORS file.  Thanks!
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH V2 3/4] tc: Add header rewrite using tc pedit action

2017-09-25 Thread Paul Blakey



On 18/09/2017 18:01, Simon Horman wrote:

On Mon, Sep 18, 2017 at 07:16:03AM +0300, Roi Dayan wrote:

From: Paul Blakey 

To be later used to implement ovs action set offloading.

Signed-off-by: Paul Blakey 
Reviewed-by: Roi Dayan 
---
  lib/tc.c | 372 ++-
  lib/tc.h |  16 +++
  2 files changed, 385 insertions(+), 3 deletions(-)

diff --git a/lib/tc.c b/lib/tc.c
index c9cada2..743b2ee 100644
--- a/lib/tc.c
+++ b/lib/tc.c
@@ -21,8 +21,10 @@
  #include 
  #include 
  #include 
+#include 
  #include 
  #include 
+#include 
  #include 
  #include 
  #include 
@@ -33,11 +35,14 @@
  #include "netlink-socket.h"
  #include "netlink.h"
  #include "openvswitch/ofpbuf.h"
+#include "openvswitch/util.h"
  #include "openvswitch/vlog.h"
  #include "packets.h"
  #include "timeval.h"
  #include "unaligned.h"
  
+#define MAX_PEDIT_OFFSETS 8


Why 8?
We don't expect anything more right now (ipv6 src/dst rewrite requires 8 
pedits iirc). I can't think of a larger use case, maybe ipv6 + macs if 
that's makes sens. do you suggest we increase it? to what?





+
  VLOG_DEFINE_THIS_MODULE(tc);
  
  static struct vlog_rate_limit error_rl = VLOG_RATE_LIMIT_INIT(60, 5);

@@ -50,6 +55,82 @@ enum tc_offload_policy {
  
  static enum tc_offload_policy tc_policy = TC_POLICY_NONE;
  
+struct tc_pedit_key_ex {

+enum pedit_header_type htype;
+enum pedit_cmd cmd;
+};
+
+struct flower_key_to_pedit {
+enum pedit_header_type htype;
+int flower_offset;
+int offset;
+int size;
+};
+
+static struct flower_key_to_pedit flower_pedit_map[] = {
+{
+TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
+12,
+offsetof(struct tc_flower_key, ipv4.ipv4_src),
+MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_src)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
+16,
+offsetof(struct tc_flower_key, ipv4.ipv4_dst),
+MEMBER_SIZEOF(struct tc_flower_key, ipv4.ipv4_dst)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_IP4,
+8,
+offsetof(struct tc_flower_key, ipv4.rewrite_ttl),
+MEMBER_SIZEOF(struct tc_flower_key, ipv4.rewrite_ttl)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
+8,
+offsetof(struct tc_flower_key, ipv6.ipv6_src),
+MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_src)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_IP6,
+24,
+offsetof(struct tc_flower_key, ipv6.ipv6_dst),
+MEMBER_SIZEOF(struct tc_flower_key, ipv6.ipv6_dst)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
+6,
+offsetof(struct tc_flower_key, src_mac),
+MEMBER_SIZEOF(struct tc_flower_key, src_mac)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
+0,
+offsetof(struct tc_flower_key, dst_mac),
+MEMBER_SIZEOF(struct tc_flower_key, dst_mac)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_ETH,
+12,
+offsetof(struct tc_flower_key, eth_type),
+MEMBER_SIZEOF(struct tc_flower_key, eth_type)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
+0,
+offsetof(struct tc_flower_key, tcp_src),
+MEMBER_SIZEOF(struct tc_flower_key, tcp_src)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_TCP,
+2,
+offsetof(struct tc_flower_key, tcp_dst),
+MEMBER_SIZEOF(struct tc_flower_key, tcp_dst)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
+0,
+offsetof(struct tc_flower_key, udp_src),
+MEMBER_SIZEOF(struct tc_flower_key, udp_src)
+}, {
+TCA_PEDIT_KEY_EX_HDR_TYPE_UDP,
+2,
+offsetof(struct tc_flower_key, udp_dst),
+MEMBER_SIZEOF(struct tc_flower_key, udp_dst)
+},
+};
+
  struct tcmsg *
  tc_make_request(int ifindex, int type, unsigned int flags,
  struct ofpbuf *request)
@@ -365,6 +446,96 @@ nl_parse_flower_ip(struct nlattr **attrs, struct tc_flower 
*flower) {
  }
  }
  
+static const struct nl_policy pedit_policy[] = {

+[TCA_PEDIT_PARMS_EX] = { .type = NL_A_UNSPEC,
+ .min_len = sizeof(struct tc_pedit),
+ .optional = false, },
+[TCA_PEDIT_KEYS_EX]   = { .type = NL_A_NESTED,
+  .optional = false, },
+};
+
+static int
+nl_parse_act_pedit(struct nlattr *options, struct tc_flower *flower)
+{
+struct nlattr *pe_attrs[ARRAY_SIZE(pedit_policy)];
+const struct tc_pedit *pe;
+const struct tc_pedit_key *keys;
+const struct nlattr *nla, *keys_ex, *ex_type;
+const void *keys_attr;
+char *rewrite_key = (void *) &flower->rewrite.key;
+char *rewrite_mask = (void *) &flower->rewrite.mask;
+size_t keys_ex_size, left;
+int type, i = 0;
+
+if (!nl_parse_nested(options, pedit_policy, pe_attrs,
+ ARRAY_SIZE(pedit_policy))) {
+VLOG_ERR_RL(&error_rl, "failed to parse pedit action options");
+return EPROTO;
+}
+
+pe = nl_attr_get_unspec(pe_

Re: [ovs-dev] [PATCH] ovn-controller: pending_ct_zones should be destroyed

2017-09-25 Thread Russell Bryant
On Mon, Sep 25, 2017 at 5:01 AM, Miguel Angel Ajo Pelayo
 wrote:
> Acked-by: Miguel Angel Ajo 
>
> (Somehow I thought I had acked this, but apparently I only checked it was
> ok and never responded)

You had acked a previous version of the patch.

I applied this to master and also updated the AUTHORS file.  Thanks!
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] 2.7.3 release?

2017-09-25 Thread Russell Bryant
There's some important changes in branch-2.7.  Can we do a 2.7.3
release soon?  Does anyone have anything they need addressed first?

Thanks,

-- 
Russell Bryant
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH RFC 39/52] ovn-sbctl: Allow retries by default.

2017-09-25 Thread Ben Pfaff
OVS has that already too, but the retry time caps out at 8 seconds
currently.  It goes from 1 to 2 to 4 to 8 seconds.  The maximum could
easily be raised.  I don't know a good way to decide what the maximum
should be.

Possibly, it should be a random time within a range, to avoid
synchronizing clients' retries when a server goes down.  That is not yet
implemented (I just thought of it now).

On Mon, Sep 25, 2017 at 10:53:09AM +0200, Miguel Angel Ajo Pelayo wrote:
> It makes sense.
> 
> As an idea for future improvements (also applicable to the python client
> and the ovsdbapp): something that I've found valuable in distributed
> systems is the ability to set an exponential retry time (with a ceiling)
> because it's a good failsafe mechanism when one of the clients sends some
> sort of big/buggy request that it's going to trigger a failure/long
> execution time on the server and finally timeout.
> 
> The exponential backoff makes it more likely than the server will recover,
> or have time to serve other clients.
> 
> On Fri, Sep 22, 2017 at 9:48 PM, Ben Pfaff  wrote:
> 
> > Thank you for the review.
> >
> > There isn't currently a way to control the number of retries.  If that
> > is a valuable feature, then it could be added.
> >
> > You can control the timeout for these utilities with the --timeout
> > option (or use the "timeout" program from GNU coreutils).
> >
> > On Thu, Sep 21, 2017 at 01:17:09PM +0200, Miguel Angel Ajo Pelayo wrote:
> > > Makes sense. Is there any way to control the number of retries, and the
> > > retry time?
> > >
> > > Acked-by: Miguel Angel Ajo 
> > >
> > > On Wed, Sep 20, 2017 at 12:01 AM, Ben Pfaff  wrote:
> > >
> > > > Most of the OVS database-manipulation utilities (ovn-sbctl, ovn-nbctl,
> > > > ovs-vsctl, vtep-ctl) don't retry their connections by default because
> > > > they assume that the database is either up or down and likely to stay
> > > > that way.  The OVN southbound database, however, is a likely candidate
> > > > for high availability clustering, so that even if it appears to be
> > > > down for a moment it will be available again soon.  So, prepare for
> > > > the clustering implementation by enabling retry by default in
> > > > ovn-sbctl.
> > > >
> > > > Signed-off-by: Ben Pfaff 
> > > > ---
> > > >  ovn/utilities/ovn-sbctl.c | 2 +-
> > > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > >
> > > > diff --git a/ovn/utilities/ovn-sbctl.c b/ovn/utilities/ovn-sbctl.c
> > > > index 7af5863b08fc..f4452fa6ee85 100644
> > > > --- a/ovn/utilities/ovn-sbctl.c
> > > > +++ b/ovn/utilities/ovn-sbctl.c
> > > > @@ -120,7 +120,7 @@ main(int argc, char *argv[])
> > > >  }
> > > >
> > > >  /* Initialize IDL. */
> > > > -idl = the_idl = ovsdb_idl_create(db, &sbrec_idl_class, false,
> > false);
> > > > +idl = the_idl = ovsdb_idl_create(db, &sbrec_idl_class, false,
> > true);
> > > >  run_prerequisites(commands, n_commands, idl);
> > > >
> > > >  /* Execute the commands.
> > > > --
> > > > 2.10.2
> > > >
> > > > ___
> > > > dev mailing list
> > > > d...@openvswitch.org
> > > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > > >
> >
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC 0/2] EMC load-shedding

2017-09-25 Thread O Mahony, Billy
Hi Darrell,

Some more information below. I'll hold off on a v2 for now to give others time 
to comment.

Thanks,
Billy. 

 
> -Original Message-
> From: Darrell Ball [mailto:db...@vmware.com]
> Sent: Friday, September 22, 2017 7:20 PM
> To: O Mahony, Billy ; d...@openvswitch.org
> Cc: i.maxim...@samsung.com; jan.scheur...@ericsson.com
> Subject: Re: [RFC 0/2] EMC load-shedding
> 
> Thanks for working on this Billy
> One comment inline.
> 
> On 9/22/17, 6:47 AM, "Billy O'Mahony"  wrote:
> 
> Hi All,
> 
> Please find attached RFC patch for EMC load-shedding [1] as promised [2].
> 
> This applies clean on 5ff834 "Increment ct packet counters..." It also 
> uses
> Ilya's patch "Fix per packet cycles statistics." [3] so I've included 
> that in
> the patch set as it wasn't merged when I started the RFC.
> 
> The main goal for this RFC is only to demonstrate the outline of the
> mechanism
> and get feedback & advice for further work.
> 
> However I did some initial testing with promising results. For 8K to 64K
> flows
> the cycles per packet drop from ~1200 to ~1100. For small numbers of flows
> (~16) the cycles per packet remain at ~900 which I beleive means no
> increase
> but I didn't baseline that situation.
> 
> There are some TODOs commented in the patch with XXX.
> 
> For one I think the mechanism should take into account the expected
> cycle-cost
> of EMC lookup and EMC miss (dpcls lookup) when deciding how much load
> to shed.
> Rather than the heuristic in this patch which is to keep the emc hit rate 
> (for
> flow which have not been diverted from the EMC) between certain
> bounds.
> 
> 
> [Darrell]
> Could you expand on the description of the algorithm and the rational?
> I know the algorithm was discussed along with other proposed patches, but I
> think it be would be beneficial if the patch (boils down to a single patch)
> described it.
[[BO'M]] 

I'll add that description and some comments to the v2 of the patch.  In the 
meantime reviewers should find this helpful:

As the number of flows increased there will eventually be too many flows 
contending for a place in the EMC cache. The EMC cache becomes a liability when 
emc_lookup_cost + (emc_miss_rate * dpcls_lookup_cost) grows to be greater
than a straightforward dpcls_lookup_cost. When this occurs if some proportion 
of flows could be made to skip the EMC  (ie neither be inserted into nor looked 
up in the EMC) it would result in lower lookup costs overall. 

This requires and efficient and flexible way to categorize flows into - 'skip 
EMC' and 'use EMC' categories. The RSS hash can fulfil this role by setting a 
threshold whereby RSS hashes under a certain value are skipped from the EMC.

The algorithm in this RFC is based on setting this shed threshold so that the 
hit rate on the EMC remains between 50 and 70% which from observation gives an 
efficient use of the EMC (based on cycles per packet). Periodically (after each 
3 million packets) the EMC hit rate is checked and if it is over 70% then the 
shed threshold is increased (more flows are shed from the EMC) and if it is 
below 50% the shed threshold in decreased (fewer flows are shed from the EMC). 
The shed threshold as 16 different values (0x_ to 0xF000_) which 
allows for no-shedding, 1/16th, 2/16ths, ... 15/16ths of flows to skipped from 
the EMC.

Each time the shed_threshold is adjusted it is moved by just one step.

Later revisions will look at the actual lookup cost for flows in the EMC and 
dpcls rather than using hard-coded hit rates to define efficient use of the 
EMC. They may also adjust the shed rate in a proportional manner and adjust on 
a timed interval instead of every N packets.


> 
> Probably the code could benefit from some expanded comments as well?
> 
> I see one comment in the code
> +/* As hit rate goes down shed thresh goes up (more is shed from EMC)
> */
> +/* XXX consider increment more if further out of bounds *
> 
[[BO'M]] 
> 
> Also we should decide on at least one flow distribution that would be
> useful
> (i.e. realistic) for EMC testing. The tests above have either been 
> carried out
> with a random (uniform) flow distribution which doesn't play well with 
> flow
> caching or else a round-robin flow distribution which is actually 
> adverserial
> to flow caching. If I have an agreed flow distribution I can then figure 
> out
> how to produce it for testing :).
> 
> [1] https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__mail.openvswitch.org_pipermail_ovs-2Ddev_2017-
> 2DAugust_336509.html&d=DwIBAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA
> 09CGX7JQ5Ih-
> uZnsw&m=xyXl4Y7PjflJKH8RTjHev3uMh9JgQGiFk0aPezyN3Qc&s=dr9kNy7p3
> flCYC1W1AO02TRCvTHiazrQ9A5-Ssi4A7o&e=
> [2] https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__mail.openvswitch.org_pipermail_ovs-2Ddev_2017-
> 2DSeptember_338380.html&d=DwIBAg&c=uilaK90D4TOVoH58JNXRgQ&r=B

[ovs-dev] Proposal for enabling dp_hash irrespective of OF version

2017-09-25 Thread Nitin Katiyar
Hi,
I have a proposal to add a provision for using "dp_hash" as selection method 
(which can currently be used with OF1.5 based controllers only) irrespective of 
the OF version being used by controller.

The link for document is : 
https://docs.google.com/document/d/13Jiwbs0atV_Y8Vatj6SmQeB_qdM-AyHu8uhtsO6P-_8/edit?usp=sharing

Let me know your comments.

Regards,
Nitin

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 2/2] dpctl: init CT entry variable.

2017-09-25 Thread Fischetti, Antonio
> -Original Message-
> From: Darrell Ball [mailto:db...@vmware.com]
> Sent: Friday, September 22, 2017 9:26 AM
> To: Fischetti, Antonio ; d...@openvswitch.org
> Subject: Re: [ovs-dev] [PATCH 2/2] dpctl: init CT entry variable.
> 
> 
> 
> On 9/13/17, 5:37 AM, "ovs-dev-boun...@openvswitch.org on behalf of
> antonio.fische...@intel.com"  antonio.fische...@intel.com> wrote:
> 
> ct_dpif_entry_uninit could potentially be called even if
> ct_dpif_dump_next failed. As ct_dpif_entry_uninit receives
> a pointer to a CT entry - and just checks it is not null -
> it's safer to init to zero any instantiated ct_dpif_entry
> variable before its usage.
> 
> [Darrell] I took a look and did not see a particular problem.
>Was there an issue that we are trying to address?; if so, this
> may hide it ?

[Antonio]
This change is more a matter of keeping safe habits for future
code additions. 
In a new CT function that could be added down the line, one could
potentially call ct_dpif_entry_uninit without checking what was
returned by ct_dpif_dump_next.

As this is not in the hotpath, I added a memset to be extra-careful 
when initializing the local CT entry variable.

Maybe also a comment on top of the fn definition could help on this,
something like?

+/* This function must be called when the returned
+   value from ct_dpif_dump_next is 0. */
void
ct_dpif_entry_uninit(struct ct_dpif_entry *entry)
{
if (entry) {
if (entry->helper.name) {



> 
> 
> Signed-off-by: Antonio Fischetti 
> ---
>  lib/dpctl.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/lib/dpctl.c b/lib/dpctl.c
> index 86d0f90..77d4e58 100644
> --- a/lib/dpctl.c
> +++ b/lib/dpctl.c
> @@ -1287,6 +1287,7 @@ dpctl_dump_conntrack(int argc, const char *argv[],
>  return error;
>  }
> 
> +memset(&cte, 0, sizeof(cte));
>  while (!(ret = ct_dpif_dump_next(dump, &cte))) {
>  struct ds s = DS_EMPTY_INITIALIZER;
> 
> @@ -1392,6 +1393,7 @@ dpctl_ct_stats_show(int argc, const char *argv[],
>  return error;
>  }
> 
> +memset(&cte, 0, sizeof(cte));
>  int tot_conn = 0;
>  while (!(ret = ct_dpif_dump_next(dump, &cte))) {
>  ct_dpif_entry_uninit(&cte);
> @@ -1532,6 +1534,7 @@ dpctl_ct_bkts(int argc, const char *argv[],
>   return 0;
>  }
> 
> +memset(&cte, 0, sizeof(cte));
>  dpctl_print(dpctl_p, "Total Buckets: %d\n", tot_bkts);
> 
>  int tot_conn = 0;
> --
> 2.4.11
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://urldefense.proofpoint.com/v2/url?u=https-
> 3A__mail.openvswitch.org_mailman_listinfo_ovs-
> 2Ddev&d=DwICAg&c=uilaK90D4TOVoH58JNXRgQ&r=BVhFA09CGX7JQ5Ih-
> uZnsw&m=3FF1c4sa7rHZb5a1DAZQlnsPZywcY7R_LNFki9WS9So&s=tU4fSt243XI_2QHkAF4R2h0sm
> vtTC8fDyiOXBI02_t8&e=
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] locks for clustered OVSDB

2017-09-25 Thread Miguel Angel Ajo Pelayo
I believe Lucas Alvares could give you valuable feedback on this as
he was planning to use this as a mechanism for synchronization on
the networking-ovn side (if I didn't get it wrong).

I believe he's back by October.

Best regards.
Miguel Ángel.

On Fri, Sep 22, 2017 at 6:58 PM, Ben Pfaff  wrote:

> We've had a couple of brief discussions during the OVN meeting about
> locks in OVSDB.  As I understand it, a few services use OVSDB locks to
> avoid duplicating work.  The question is whether and how to extend OVSDB
> locks to a distributed context.
>
> First, I think it's worth reviewing how OVSDB locks work, filling in
> some of the implications that aren't covered by RFC 7047.  OVSDB locks
> are server-level (not database-level) objects that can be owned by at
> most one client at a time.  Clients can obtain them either through a
> "lock" operation, in which case they get queued to obtain the lock when
> it's no longer owned by anyone else, or through a "steal" operation that
> always succeeds immediately, kicking out whoever (if anyone) previously
> owned the lock.  A client loses a lock whenever it releases it with an
> "unlock" operation or whenever its connection to the server drops.  The
> server notifies a client whenever it acquires a lock or whenever it is
> stolen by another client.
>
> This scheme works perfectly for one particular scenario: where the
> resource protected by the lock is an OVSDB database (or part of one) on
> the same server as the lock.  This is because OVSDB transactions include
> an "assert" operation that names a lock and aborts the transaction if
> the client does not hold the lock.  Since the server is both the lock
> manager and the implementer of the transaction, it can always make the
> correct decision.  This scenario could be extended to distributed locks
> with the same guarantee.
>
> Another scenario that could work acceptably with distributed OVSDB locks
> is one where the lock guards against duplicated work.  For example,
> suppose a couple of ovn-northd instances both try to grab a lock, with
> only the winner actually running, to avoid having both of them spend a
> lot of CPU time recomputing the southbound flow table.  A distributed
> version of OVSDB locks would probably work fine in practice for this,
> although occasionally due to network propagation delays, "steal"
> operations, or different ideas between client and server of when a
> session has dropped, both ovn-northd might think they have the lock.
> (If, however, they combined this with "assert" when they actually
> committed their changes to the southbound database, then they would
> never actually interfere with each other in database commits.)
>
> A scenario that would not work acceptably with distributed OVSDB locks,
> without a change to the model, is where the lock ensures correctness,
> that is, if two clients both think they have the lock then bad things
> happen.  I believe that this requires clients to understand a concept of
> leases, which OVSDB doesn't currently have.  The "steal" operation is
> also problematic in this model since it would require canceling a
> lease.  (This scenario also does not work acceptably with single-server
> OVSDB locks.)
>
> I'd appreciate anyone's thoughts on the topic.
>
> This webpage is good reading:
> https://martin.kleppmann.com/2016/02/08/how-to-do-distributed-locking.html
>
> Thanks,
>
> Ben.
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] dpctl: manage ret value when dumping CT entries.

2017-09-25 Thread Fischetti, Antonio
Hi Darrell, 
I agree with your suggestion in keeping 'error'
as the only variable to manage return values.

In this case - as I'm assuming we shouldn't return an EOF to the 
caller - I should manage error as below?

if (error == EOF) {
error = 0;  << EOF is not an issue, so return 0 to the caller
} else if (error) {  
dpctl_error(dpctl_p, error, "dumping conntrack entry");
ct_dpif_dump_done(dump);
dpif_close(dpif);
return error;
} 


> -Original Message-
> From: Darrell Ball [mailto:db...@vmware.com]
> Sent: Friday, September 22, 2017 8:42 AM
> To: Fischetti, Antonio ; d...@openvswitch.org
> Subject: Re: [ovs-dev] [PATCH 1/2] dpctl: manage ret value when dumping CT
> entries.
> 
> Few comments Antonio
> 
> Darrell
> 
> On 9/13/17, 5:37 AM, "ovs-dev-boun...@openvswitch.org on behalf of
> antonio.fische...@intel.com"  antonio.fische...@intel.com> wrote:
> 
> Manage error value returned by ct_dpif_dump_next.
> 
> Signed-off-by: Antonio Fischetti 
> ---
>  lib/dpctl.c | 28 +---
>  1 file changed, 25 insertions(+), 3 deletions(-)
> 
> diff --git a/lib/dpctl.c b/lib/dpctl.c
> index 8951d6e..86d0f90 100644
> --- a/lib/dpctl.c
> +++ b/lib/dpctl.c
> @@ -1263,6 +1263,7 @@ dpctl_dump_conntrack(int argc, const char *argv[],
>  struct dpif *dpif;
>  char *name;
>  int error;
> +int ret;
> 
>  if (argc > 1 && ovs_scan(argv[argc - 1], "zone=%"SCNu16, &zone)) {
>  pzone = &zone;
> @@ -1286,7 +1287,7 @@ dpctl_dump_conntrack(int argc, const char *argv[],
>  return error;
>  }
> 
> -while (!ct_dpif_dump_next(dump, &cte)) {
> +while (!(ret = ct_dpif_dump_next(dump, &cte))) {
>  struct ds s = DS_EMPTY_INITIALIZER;
> 
>  ct_dpif_format_entry(&cte, &s, dpctl_p->verbosity,
> @@ -1296,6 +1297,13 @@ dpctl_dump_conntrack(int argc, const char *argv[],
>  dpctl_print(dpctl_p, "%s\n", ds_cstr(&s));
>  ds_destroy(&s);
>  }
> +if (ret && ret != EOF) {
> +dpctl_error(dpctl_p, ret, "dumping conntrack entry");
> +ct_dpif_dump_done(dump);
> +dpif_close(dpif);
> +return ret;
> +}
> +
> 
> [Darrell] Maybe we can reuse ‘error’ ?
> if (error && error != EOF) {
>and just do
>dpctl_error(dpctl_p, error, "dumping conntrack entry");
>  and then fall thru for cleanup ?
> 
> 
>  ct_dpif_dump_done(dump);
>  dpif_close(dpif);
>  return error;
> @@ -1348,6 +1356,7 @@ dpctl_ct_stats_show(int argc, const char *argv[],
>  int proto_stats[CT_STATS_MAX];
>  int tcp_conn_per_states[CT_DPIF_TCPS_MAX_NUM];
>  int error;
> +int ret;
> 
>  while (argc > 1 && lastargc != argc) {
>  lastargc = argc;
> @@ -1384,7 +1393,7 @@ dpctl_ct_stats_show(int argc, const char *argv[],
>  }
> 
>  int tot_conn = 0;
> -while (!ct_dpif_dump_next(dump, &cte)) {
> +while (!(ret = ct_dpif_dump_next(dump, &cte))) {
>  ct_dpif_entry_uninit(&cte);
>  tot_conn++;
>  switch (cte.tuple_orig.ip_proto) {
> @@ -1425,6 +1434,12 @@ dpctl_ct_stats_show(int argc, const char *argv[],
>  break;
>  }
>  }
> +if (ret && ret != EOF) {
> +dpctl_error(dpctl_p, ret, "dumping conntrack entry");
> +ct_dpif_dump_done(dump);
> +dpif_close(dpif);
> +return ret;
> +}
> 
> [Darrell]
>  Can we reuse ‘error’, just print error and fall thru. ?
> It looks like it is safe to print whatever we know, which could be useful.
> Otherwise, if we have an error in dump_next, we may never be able to see any
> useful info.
> for debugging the same error or something else.
> 
> 
>  dpctl_print(dpctl_p, "Connections Stats:\nTotal: %d\n", 
> tot_conn);
>  if (proto_stats[CT_STATS_TCP]) {
> @@ -1482,6 +1497,7 @@ dpctl_ct_bkts(int argc, const char *argv[],
>  uint16_t *pzone = NULL;
>  int tot_bkts = 0;
>  int error;
> +int ret;
> 
>  if (argc > 1 && !strncmp(argv[argc - 1], CT_BKTS_GT,
> strlen(CT_BKTS_GT))) {
>  if (ovs_scan(argv[argc - 1], CT_BKTS_GT"%"SCNu16, >)) {
> @@ -1521,7 +1537,7 @@ dpctl_ct_bkts(int argc, const char *argv[],
>  int tot_conn = 0;
>  uint32_t *conn_per_bkts = xzalloc(tot_bkts * sizeof(uint32_t));
> 
> -while (!ct_dpif_dump_next(dump, &cte)) {
> +while (!(ret = ct_dpif_dump_next(dump, &cte))) {
>  ct_dpif_entry_uninit(&cte);
>  tot_conn++;
>  if (tot_bkts > 0) {
> @@ -1533,6 +1549,12 @@ dpctl_ct_bkts(int argc, const char *argv[],
>  }
>  }
>  }
> +if (ret && ret != EOF

Re: [ovs-dev] [PATCH] ovn-controller: pending_ct_zones should be destroyed

2017-09-25 Thread Miguel Angel Ajo Pelayo
Acked-by: Miguel Angel Ajo 

(Somehow I thought I had acked this, but apparently I only checked it was
ok and never responded)

On Mon, Sep 25, 2017 at 7:24 AM, 00037997  wrote:

> pending_ct_zones in ovn-controller main should be destroyed when exit.
>
> Signed-off-by: xu rong 
> ---
>  ovn/controller/ovn-controller.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/ovn/controller/ovn-controller.c b/ovn/controller/ovn-
> controller.c
> index a935a79..32cdc9f 100644
> --- a/ovn/controller/ovn-controller.c
> +++ b/ovn/controller/ovn-controller.c
> @@ -843,6 +843,7 @@ main(int argc, char *argv[])
>  pinctrl_destroy();
>
>  simap_destroy(&ct_zones);
> +shash_destroy(&pending_ct_zones);
>
>  bitmap_free(group_table.group_ids);
>  hmap_destroy(&group_table.desired_groups);
> --
> 1.8.3.1
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] 答复: [PATCH v1 1/3] Add multipath static router in OVN northd and north-db

2017-09-25 Thread Miguel Angel Ajo Pelayo
No problem Gao.

Btw, give it a try if you want, and double check that's as we said. I
dint't have time to recheck, and my memory doesn't use to be great ;-)


On Sat, Sep 23, 2017 at 1:54 AM, Gao Zhenyu  wrote:

> Sorry for misleading, in my other testing, I ping the IP(is a router
> port's IP)  and saw packets go into a chassis(router are pined on this
> ovn-chassis by using option:chassis=x).  So  in that time I though the
> router should be  only on a specific ovn-node if route was pinned.
> Miguel had told me that router IP is an special case that will be handled
> by the specific chassis.
>
> Sorry for misleading again. I think we can focus on the implementatioin of
> multipath now. :)
>
> Thanks
> Zhenyu Gao
>
> 2017-09-22 20:22 GMT+08:00 Russell Bryant :
>
>> On Thu, Sep 21, 2017 at 10:56 AM, Miguel Angel Ajo Pelayo
>>  wrote:
>> > On Thu, Sep 21, 2017 at 2:21 PM, Gao Zhenyu 
>> wrote:
>> >
>> >> I think the S/N or E/W are not the matter we should considering now.
>> >>
>> >> The multipath implementation is based on the existing ovn workflows. If
>> >> you can use route to dispatch traffics to different node/logical port,
>> then
>> >> the multipath can make it. Otherwise it must get bug in multipath.
>> >> If the static route cannot dispatch traffic to some nodes or logical
>> port
>> >> then the multipath cannot make it as well.
>> >>
>> >> I am not sure if my understanding is right: I think if you deploy a
>> router
>> >> only on a specific ovn-node, then traffic between
>> A(src)---routerB(dst)
>> >> should go through this router.
>> >>
>> >
>> > That is the point where I'm not sure either. On the tests I made, if you
>> > configured an specific chassis for a router, only the N/S traffic went
>> > through that router (so multipath would make sense there), but in E/W
>> the
>> > traffic going through a router was directly going from origin port
>> chassis,
>> > to destination port chassis, without going through the specific router
>> > chassis.
>> >
>> > May be it is a bug, or may be it was missconfiguration at my side. I
>> will
>> > double check it.
>>
>> I haven't followed all of this discussion in detail, but the behavior
>> you describe is correct and intentional.
>>
>> E/W is distributed.  N/S is centralized.
>>
>> It's actually a little more complicated because you can also define
>> N/S NAT rules that are on other chassis if you have a 1-1 NAT mapping
>> to a logical port on that chassis.
>>
>> >
>> >
>> >>
>> >> Any suggestions and comments are welcome :)
>> >>
>> >>
>> >> Thanks
>> >> Zhenyu Gao
>> >>
>> >> 2017-09-21 19:07 GMT+08:00 Miguel Angel Ajo Pelayo <
>> majop...@redhat.com>:
>> >>
>> >>> May be I missed something, but when I tried setting logical routers
>> into
>> >>> specific chassis, still the E/W traffic was handled in a distributed
>> way
>> >>> (from original chassis to destination chassis without going through
>> the
>> >>> router chassis), such chassis was only used for N/S, but may be I got
>> >>> something wrong.
>> >>>
>> >>>
>> >>> On Wed, Sep 20, 2017 at 4:48 PM, Gao Zhenyu 
>> >>> wrote:
>> >>>
>>  "
>>  But, if an ovn port in foo (chassis A) wants to talk to alice1
>> (chassis
>>  B),
>>  wouldn't all that E/W routing will happen virtually and the end
>> result
>>  is just a tunneled packet between chassis A and chassis B ? "
>>  [ Now the hash function base on dst IP, if foo1 only talks to alice1,
>>  and it is the tunnel packet between chassisA and chassis B ]
>> 
>>  The benifit is if you have two ovn-routers and those router are ONLY
>>  deployed in chassis C and chassis D, the traffics can be sperated in
>> two
>>  paths automatically. Otherwise you need to config static rule one by
>> one to
>>  seperate traffics.
>>  To make a long story short, you also can do same thing by config
>>  numerous static rules to seperate traffic but the multipath can do it
>>  automatically.
>> 
>>  2017-09-20 22:08 GMT+08:00 Miguel Angel Ajo Pelayo <
>> majop...@redhat.com>
>>  :
>> 
>> > I forgot to say thank you very much for the explanation and
>> diagrams.
>> >
>> > On Wed, Sep 20, 2017 at 4:07 PM, Miguel Angel Ajo Pelayo <
>> > majop...@redhat.com> wrote:
>> >
>> >> But, if an ovn port in foo (chassis A) wants to talk to alice1
>> >> (chassis B),
>> >> wouldn't all that E/W routing will happen virtually and the end
>> result
>> >> is just a tunneled packet between chassis A and chassis B ?
>> >>
>> >> What's the benefit of multipath there if the possible failing link
>> is
>> >> always the connection between chassis A and chassis B ?
>> >>
>> >> I suspect there's something I'm missing on the picture.
>> >>
>> >> On Wed, Sep 20, 2017 at 3:49 PM, Gao Zhenyu <
>> sysugaozhe...@gmail.com>
>> >> wrote:
>> >>
>> >>> You can take a look at this patch that implement a testcase :
>> >>> https://patchwork.ozlabs.org/p

Re: [ovs-dev] [PATCH RFC 39/52] ovn-sbctl: Allow retries by default.

2017-09-25 Thread Miguel Angel Ajo Pelayo
It makes sense.

As an idea for future improvements (also applicable to the python client
and the ovsdbapp): something that I've found valuable in distributed
systems is the ability to set an exponential retry time (with a ceiling)
because it's a good failsafe mechanism when one of the clients sends some
sort of big/buggy request that it's going to trigger a failure/long
execution time on the server and finally timeout.

The exponential backoff makes it more likely than the server will recover,
or have time to serve other clients.

On Fri, Sep 22, 2017 at 9:48 PM, Ben Pfaff  wrote:

> Thank you for the review.
>
> There isn't currently a way to control the number of retries.  If that
> is a valuable feature, then it could be added.
>
> You can control the timeout for these utilities with the --timeout
> option (or use the "timeout" program from GNU coreutils).
>
> On Thu, Sep 21, 2017 at 01:17:09PM +0200, Miguel Angel Ajo Pelayo wrote:
> > Makes sense. Is there any way to control the number of retries, and the
> > retry time?
> >
> > Acked-by: Miguel Angel Ajo 
> >
> > On Wed, Sep 20, 2017 at 12:01 AM, Ben Pfaff  wrote:
> >
> > > Most of the OVS database-manipulation utilities (ovn-sbctl, ovn-nbctl,
> > > ovs-vsctl, vtep-ctl) don't retry their connections by default because
> > > they assume that the database is either up or down and likely to stay
> > > that way.  The OVN southbound database, however, is a likely candidate
> > > for high availability clustering, so that even if it appears to be
> > > down for a moment it will be available again soon.  So, prepare for
> > > the clustering implementation by enabling retry by default in
> > > ovn-sbctl.
> > >
> > > Signed-off-by: Ben Pfaff 
> > > ---
> > >  ovn/utilities/ovn-sbctl.c | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > >
> > > diff --git a/ovn/utilities/ovn-sbctl.c b/ovn/utilities/ovn-sbctl.c
> > > index 7af5863b08fc..f4452fa6ee85 100644
> > > --- a/ovn/utilities/ovn-sbctl.c
> > > +++ b/ovn/utilities/ovn-sbctl.c
> > > @@ -120,7 +120,7 @@ main(int argc, char *argv[])
> > >  }
> > >
> > >  /* Initialize IDL. */
> > > -idl = the_idl = ovsdb_idl_create(db, &sbrec_idl_class, false,
> false);
> > > +idl = the_idl = ovsdb_idl_create(db, &sbrec_idl_class, false,
> true);
> > >  run_prerequisites(commands, n_commands, idl);
> > >
> > >  /* Execute the commands.
> > > --
> > > 2.10.2
> > >
> > > ___
> > > dev mailing list
> > > d...@openvswitch.org
> > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> > >
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev