Re: [ovs-dev] [PATCH v11 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH custom stats.

2019-10-29 Thread Sriram Vatala via dev
Hi All,
Please review the update patch.

Changes since v10 :
Corrected the spelling mistake in the commit message.

Thanks & Regards,
Sriram.

-Original Message-
From: Sriram Vatala  
Sent: 29 October 2019 20:20
To: ovs-dev@openvswitch.org; ktray...@redhat.com; i.maxim...@ovn.org
Cc: Ilya Maximets ; Sriram Vatala

Subject: [PATCH v11 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH
custom stats.

From: Ilya Maximets 

This is yet another refactoring for upcoming detailed drop stats.
It allows to use single function for all the software calculated statistics
in netdev-dpdk for both vhost and ETH ports.

UINT64_MAX used as a marker for non-supported statistics in a same way as
it's done in bridge.c for common netdev stats.

Co-authored-by: Sriram Vatala 
Cc: Sriram Vatala 
Signed-off-by: Ilya Maximets 
Signed-off-by: Sriram Vatala 
---
 lib/netdev-dpdk.c | 69 +++
 1 file changed, 40 insertions(+), 29 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
04e1a2d1b..2cc2516a9 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -474,6 +474,8 @@ struct netdev_rxq_dpdk {  static void
netdev_dpdk_destruct(struct netdev *netdev);  static void
netdev_dpdk_vhost_destruct(struct netdev *netdev);
 
+static int netdev_dpdk_get_sw_custom_stats(const struct netdev *,
+   struct netdev_custom_stats 
+*);
 static void netdev_dpdk_clear_xstats(struct netdev_dpdk *dev);
 
 int netdev_dpdk_get_vid(const struct netdev_dpdk *dev); @@ -1174,7 +1176,7
@@ common_construct(struct netdev *netdev, dpdk_port_t port_no,
 dev->rte_xstats_ids = NULL;
 dev->rte_xstats_ids_size = 0;
 
-dev->tx_retries = 0;
+dev->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX;
 
 return 0;
 }
@@ -2775,7 +2777,9 @@ netdev_dpdk_get_custom_stats(const struct netdev
*netdev,
 
 uint32_t i;
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-int rte_xstats_ret;
+int rte_xstats_ret, sw_stats_size;
+
+netdev_dpdk_get_sw_custom_stats(netdev, custom_stats);
 
 ovs_mutex_lock(&dev->mutex);
 
@@ -2790,23 +2794,22 @@ netdev_dpdk_get_custom_stats(const struct netdev
*netdev,
 if (rte_xstats_ret > 0 &&
 rte_xstats_ret <= dev->rte_xstats_ids_size) {
 
-custom_stats->size = rte_xstats_ret;
-custom_stats->counters =
-(struct netdev_custom_counter *)
xcalloc(rte_xstats_ret,
-sizeof(struct netdev_custom_counter));
+sw_stats_size = custom_stats->size;
+custom_stats->size += rte_xstats_ret;
+custom_stats->counters = xrealloc(custom_stats->counters,
+  custom_stats->size *
+  sizeof 
+ *custom_stats->counters);
 
 for (i = 0; i < rte_xstats_ret; i++) {
-ovs_strlcpy(custom_stats->counters[i].name,
+ovs_strlcpy(custom_stats->counters[sw_stats_size + 
+ i].name,
 netdev_dpdk_get_xstat_name(dev,
 
dev->rte_xstats_ids[i]),
 NETDEV_CUSTOM_STATS_NAME_SIZE);
-custom_stats->counters[i].value = values[i];
+custom_stats->counters[sw_stats_size + i].value = 
+ values[i];
 }
 } else {
 VLOG_WARN("Cannot get XSTATS values for port:
"DPDK_PORT_ID_FMT,
   dev->port_id);
-custom_stats->counters = NULL;
-custom_stats->size = 0;
 /* Let's clear statistics cache, so it will be
  * reconfigured */
 netdev_dpdk_clear_xstats(dev); @@ -2821,39 +2824,47 @@
netdev_dpdk_get_custom_stats(const struct netdev *netdev,  }
 
 static int
-netdev_dpdk_vhost_get_custom_stats(const struct netdev *netdev,
-   struct netdev_custom_stats
*custom_stats)
+netdev_dpdk_get_sw_custom_stats(const struct netdev *netdev,
+struct netdev_custom_stats 
+*custom_stats)
 {
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-int i;
+int i, n;
 
-#define VHOST_CSTATS \
-VHOST_CSTAT(tx_retries)
+#define SW_CSTATS \
+SW_CSTAT(tx_retries)
 
-#define VHOST_CSTAT(NAME) + 1
-custom_stats->size = VHOST_CSTATS;
-#undef VHOST_CSTAT
+#define SW_CSTAT(NAME) + 1
+custom_stats->size = SW_CSTATS;
+#undef SW_CSTAT
 custom_stats->counters = xcalloc(custom_stats->size,
  sizeof *custom_stats->counters);
-i = 0;
-#define VHOST_CSTAT(NAME) \
-ovs_strlcpy(custom_stats->counters[i++].name, #NAME, \
-NETDEV_CUSTOM_STATS_NAME_SIZE);
-VHOST_CSTATS;
-#undef VHOST_CSTAT
 
 ovs_mutex_lock(&dev->mutex);
 
 rte_spinlock_lock(&dev->stats_lock);
 i = 0;
-#define VHOST_CSTAT(NAME) \
+#define SW_CSTAT(NAME) \
 custom_stats->counters[i++].value = dev->NAME;
-VHOS

[ovs-dev] [PATCH v11 3/3] ovs-bugtool: Script to collect the port statistics

2019-10-29 Thread Sriram Vatala via dev
Sometimes, analysing the drop statistics of the ports
will be helpful in debugging. This patch adds script
to collect all supported port stats which also includes
the drop counters in userspace datapath. The output of
this script is included in the bugtool output.

Signed-off-by: Sriram Vatala 
---
 utilities/bugtool/automake.mk |  3 ++-
 utilities/bugtool/ovs-bugtool-get-port-stats  | 15 +++
 .../plugins/network-status/openvswitch.xml|  1 +
 3 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100755 utilities/bugtool/ovs-bugtool-get-port-stats

diff --git a/utilities/bugtool/automake.mk b/utilities/bugtool/automake.mk
index 4c85b9cba..0a9b93088 100644
--- a/utilities/bugtool/automake.mk
+++ b/utilities/bugtool/automake.mk
@@ -21,7 +21,8 @@ bugtool_scripts = \
utilities/bugtool/ovs-bugtool-ovs-bridge-datapath-type \
utilities/bugtool/ovs-bugtool-ovs-vswitchd-threads-affinity \
utilities/bugtool/ovs-bugtool-qos-configs \
-   utilities/bugtool/ovs-bugtool-get-dpdk-nic-numa
+   utilities/bugtool/ovs-bugtool-get-dpdk-nic-numa \
+   utilities/bugtool/ovs-bugtool-get-port-stats
 
 scripts_SCRIPTS += $(bugtool_scripts)
 
diff --git a/utilities/bugtool/ovs-bugtool-get-port-stats 
b/utilities/bugtool/ovs-bugtool-get-port-stats
new file mode 100755
index 0..23e61034e
--- /dev/null
+++ b/utilities/bugtool/ovs-bugtool-get-port-stats
@@ -0,0 +1,15 @@
+#! /bin/bash
+
+#Iterate through each port of every bridge and print
+#the port statistics
+
+for bridge in `ovs-vsctl -- --real list-br`
+do
+echo "${bridge} : "
+echo "  ${bridge} : `ovs-vsctl get interface ${bridge} statistics`"
+for iface in `ovs-vsctl list-ifaces ${bridge}`
+do
+echo "  ${iface} : `ovs-vsctl get interface ${iface} statistics`"
+done
+echo -e "\n"
+done
diff --git a/utilities/bugtool/plugins/network-status/openvswitch.xml 
b/utilities/bugtool/plugins/network-status/openvswitch.xml
index b0e7a1510..72aa44930 100644
--- a/utilities/bugtool/plugins/network-status/openvswitch.xml
+++ b/utilities/bugtool/plugins/network-status/openvswitch.xml
@@ -41,4 +41,5 @@
 /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges
 "dump-group-stats"
 /usr/share/openvswitch/scripts/ovs-bugtool-get-dpdk-nic-numa
 ip -s -s link 
show
+/usr/share/openvswitch/scripts/ovs-bugtool-get-port-stats
 
-- 
2.20.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v11 2/3] netdev-dpdk : Detailed packet drop statistics

2019-10-29 Thread Sriram Vatala via dev
OVS may be unable to transmit packets for multiple reasons on
the userspace datapath and today there is a single counter to
track packets dropped due to any of those reasons. This patch
adds custom software stats for the different reasons packets
may be dropped during tx/rx on the userspace datapath in OVS.

- MTU drops : drops that occur due to a too large packet size
- Qos drops : drops that occur due to egress/ingress QOS
- Tx failures: drops as returned by the DPDK PMD send function

Note that the reason for tx failures is not specified in OVS.
In practice for vhost ports it is most common that tx failures
are because there are not enough available descriptors,
which is usually caused by misconfiguration of the guest queues
and/or because the guest is not consuming packets fast enough
from the queues.

These counters are displayed along with other stats in
"ovs-vsctl get interface  statistics" command and are
available for dpdk and vhostuser/vhostuserclient ports.

Also the existing "tx_retries" counter for vhost ports has been
renamed to "ovs_tx_retries", so that all the custom statistics
that OVS accumulates itself will have the prefix "ovs_". This
will prevent any custom stats names overlapping with
driver/HW stats.

Signed-off-by: Sriram Vatala 
---
 Documentation/topics/dpdk/bridge.rst |  6 ++
 Documentation/topics/dpdk/vhost-user.rst |  2 +-
 lib/netdev-dpdk.c| 82 +++-
 3 files changed, 72 insertions(+), 18 deletions(-)

diff --git a/Documentation/topics/dpdk/bridge.rst 
b/Documentation/topics/dpdk/bridge.rst
index d9bc7eba4..f0ef42ecc 100644
--- a/Documentation/topics/dpdk/bridge.rst
+++ b/Documentation/topics/dpdk/bridge.rst
@@ -75,6 +75,12 @@ OpenFlow14`` option::
 
 $ ovs-ofctl -O OpenFlow14 dump-ports br0
 
+There are custom statistics that OVS accumulates itself and these stats has
+``ovs_`` as prefix. These custom stats are shown along with other stats
+using the following command::
+
+$ ovs-vsctl get Interface  statistics
+
 EMC Insertion Probability
 -
 
diff --git a/Documentation/topics/dpdk/vhost-user.rst 
b/Documentation/topics/dpdk/vhost-user.rst
index cda5b122f..ec0caeb16 100644
--- a/Documentation/topics/dpdk/vhost-user.rst
+++ b/Documentation/topics/dpdk/vhost-user.rst
@@ -551,7 +551,7 @@ processing packets at the required rate.
 The amount of Tx retries on a vhost-user or vhost-user-client interface can be
 shown with::
 
-  $ ovs-vsctl get Interface dpdkvhostclient0 statistics:tx_retries
+  $ ovs-vsctl get Interface dpdkvhostclient0 statistics:ovs_tx_retries
 
 vhost-user Dequeue Zero Copy (experimental)
 ---
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 2cc2516a9..6922e61ca 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -174,6 +174,20 @@ static const struct vhost_device_ops virtio_net_device_ops 
=
 .destroy_connection = destroy_connection,
 };
 
+/* Custom software stats for dpdk ports */
+struct netdev_dpdk_sw_stats {
+/* No. of retries when unable to transmit. */
+uint64_t tx_retries;
+/* Packet drops when unable to transmit; Probably Tx queue is full. */
+uint64_t tx_failure_drops;
+/* Packet length greater than device MTU. */
+uint64_t tx_mtu_exceeded_drops;
+/* Packet drops in egress policer processing. */
+uint64_t tx_qos_drops;
+/* Packet drops in ingress policer processing. */
+uint64_t rx_qos_drops;
+};
+
 enum { DPDK_RING_SIZE = 256 };
 BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE));
 enum { DRAIN_TSC = 20ULL };
@@ -416,11 +430,10 @@ struct netdev_dpdk {
 
 PADDED_MEMBERS(CACHE_LINE_SIZE,
 struct netdev_stats stats;
-/* Custom stat for retries when unable to transmit. */
-uint64_t tx_retries;
+struct netdev_dpdk_sw_stats *sw_stats;
 /* Protects stats */
 rte_spinlock_t stats_lock;
-/* 4 pad bytes here. */
+/* 36 pad bytes here. */
 );
 
 PADDED_MEMBERS(CACHE_LINE_SIZE,
@@ -1176,7 +1189,8 @@ common_construct(struct netdev *netdev, dpdk_port_t 
port_no,
 dev->rte_xstats_ids = NULL;
 dev->rte_xstats_ids_size = 0;
 
-dev->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX;
+dev->sw_stats = xzalloc(sizeof *dev->sw_stats);
+dev->sw_stats->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX;
 
 return 0;
 }
@@ -1362,6 +1376,7 @@ common_destruct(struct netdev_dpdk *dev)
 ovs_list_remove(&dev->list_node);
 free(ovsrcu_get_protected(struct ingress_policer *,
   &dev->ingress_policer));
+free(dev->sw_stats);
 ovs_mutex_destroy(&dev->mutex);
 }
 
@@ -2212,6 +2227,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq,
 rte_spinlock_lock(&dev->stats_lock);
 netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch->packets,
  nb_rx, dropped);
+dev->sw_stats->rx_qos_drops += dropped;
 rt

[ovs-dev] [PATCH v11 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH custom stats.

2019-10-29 Thread Sriram Vatala via dev
From: Ilya Maximets 

This is yet another refactoring for upcoming detailed drop stats.
It allows to use single function for all the software calculated
statistics in netdev-dpdk for both vhost and ETH ports.

UINT64_MAX used as a marker for non-supported statistics in a
same way as it's done in bridge.c for common netdev stats.

Co-authored-by: Sriram Vatala 
Cc: Sriram Vatala 
Signed-off-by: Ilya Maximets 
Signed-off-by: Sriram Vatala 
---
 lib/netdev-dpdk.c | 69 +++
 1 file changed, 40 insertions(+), 29 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 04e1a2d1b..2cc2516a9 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -474,6 +474,8 @@ struct netdev_rxq_dpdk {
 static void netdev_dpdk_destruct(struct netdev *netdev);
 static void netdev_dpdk_vhost_destruct(struct netdev *netdev);
 
+static int netdev_dpdk_get_sw_custom_stats(const struct netdev *,
+   struct netdev_custom_stats *);
 static void netdev_dpdk_clear_xstats(struct netdev_dpdk *dev);
 
 int netdev_dpdk_get_vid(const struct netdev_dpdk *dev);
@@ -1174,7 +1176,7 @@ common_construct(struct netdev *netdev, dpdk_port_t 
port_no,
 dev->rte_xstats_ids = NULL;
 dev->rte_xstats_ids_size = 0;
 
-dev->tx_retries = 0;
+dev->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX;
 
 return 0;
 }
@@ -2775,7 +2777,9 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev,
 
 uint32_t i;
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-int rte_xstats_ret;
+int rte_xstats_ret, sw_stats_size;
+
+netdev_dpdk_get_sw_custom_stats(netdev, custom_stats);
 
 ovs_mutex_lock(&dev->mutex);
 
@@ -2790,23 +2794,22 @@ netdev_dpdk_get_custom_stats(const struct netdev 
*netdev,
 if (rte_xstats_ret > 0 &&
 rte_xstats_ret <= dev->rte_xstats_ids_size) {
 
-custom_stats->size = rte_xstats_ret;
-custom_stats->counters =
-(struct netdev_custom_counter *) xcalloc(rte_xstats_ret,
-sizeof(struct netdev_custom_counter));
+sw_stats_size = custom_stats->size;
+custom_stats->size += rte_xstats_ret;
+custom_stats->counters = xrealloc(custom_stats->counters,
+  custom_stats->size *
+  sizeof *custom_stats->counters);
 
 for (i = 0; i < rte_xstats_ret; i++) {
-ovs_strlcpy(custom_stats->counters[i].name,
+ovs_strlcpy(custom_stats->counters[sw_stats_size + i].name,
 netdev_dpdk_get_xstat_name(dev,
dev->rte_xstats_ids[i]),
 NETDEV_CUSTOM_STATS_NAME_SIZE);
-custom_stats->counters[i].value = values[i];
+custom_stats->counters[sw_stats_size + i].value = values[i];
 }
 } else {
 VLOG_WARN("Cannot get XSTATS values for port: "DPDK_PORT_ID_FMT,
   dev->port_id);
-custom_stats->counters = NULL;
-custom_stats->size = 0;
 /* Let's clear statistics cache, so it will be
  * reconfigured */
 netdev_dpdk_clear_xstats(dev);
@@ -2821,39 +2824,47 @@ netdev_dpdk_get_custom_stats(const struct netdev 
*netdev,
 }
 
 static int
-netdev_dpdk_vhost_get_custom_stats(const struct netdev *netdev,
-   struct netdev_custom_stats *custom_stats)
+netdev_dpdk_get_sw_custom_stats(const struct netdev *netdev,
+struct netdev_custom_stats *custom_stats)
 {
 struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
-int i;
+int i, n;
 
-#define VHOST_CSTATS \
-VHOST_CSTAT(tx_retries)
+#define SW_CSTATS \
+SW_CSTAT(tx_retries)
 
-#define VHOST_CSTAT(NAME) + 1
-custom_stats->size = VHOST_CSTATS;
-#undef VHOST_CSTAT
+#define SW_CSTAT(NAME) + 1
+custom_stats->size = SW_CSTATS;
+#undef SW_CSTAT
 custom_stats->counters = xcalloc(custom_stats->size,
  sizeof *custom_stats->counters);
-i = 0;
-#define VHOST_CSTAT(NAME) \
-ovs_strlcpy(custom_stats->counters[i++].name, #NAME, \
-NETDEV_CUSTOM_STATS_NAME_SIZE);
-VHOST_CSTATS;
-#undef VHOST_CSTAT
 
 ovs_mutex_lock(&dev->mutex);
 
 rte_spinlock_lock(&dev->stats_lock);
 i = 0;
-#define VHOST_CSTAT(NAME) \
+#define SW_CSTAT(NAME) \
 custom_stats->counters[i++].value = dev->NAME;
-VHOST_CSTATS;
-#undef VHOST_CSTAT
+SW_CSTATS;
+#undef SW_CSTAT
 rte_spinlock_unlock(&dev->stats_lock);
 
 ovs_mutex_unlock(&dev->mutex);
 
+i = 0;
+n = 0;
+#define SW_CSTAT(NAME) \
+if (custom_stats->counters[i].value != UINT64_MAX) {   \
+ovs_strlcpy(custom_stats->counters[n].name, #NAME, \
+ 

Re: [ovs-dev] [PATCH v10 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH custom stats.

2019-10-29 Thread Sriram Vatala via dev
Thanks Kevin for reviewing and acknowledging the patch. I will correct the 
spelling and send the updated patch.

Thanks & Regards,
Sriram.

-Original Message-
From: Kevin Traynor 
Sent: 25 October 2019 20:36
To: Sriram Vatala ; ovs-dev@openvswitch.org; 
i.maxim...@ovn.org
Cc: Ilya Maximets 
Subject: Re: [PATCH v10 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH 
custom stats.

On 24/10/2019 19:21, Sriram Vatala wrote:
> From: Ilya Maximets 
>
> This is yet another refactoring for upcoming detailed drop stats.
> It allowes to use single function for all the software calculated

s/allowes/allows

> statistics in netdev-dpdk for both vhost and ETH ports.
>
> UINT64_MAX used as a marker for non-supported statistics in a same way
> as it's done in bridge.c for common netdev stats.
>
> Co-authored-by: Sriram Vatala 
> Cc: Sriram Vatala 
> Signed-off-by: Ilya Maximets 
> Signed-off-by: Sriram Vatala 

Acked-by: Kevin Traynor 

> ---
>  lib/netdev-dpdk.c | 69
> +++
>  1 file changed, 40 insertions(+), 29 deletions(-)
>
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index
> 04e1a2d1b..2cc2516a9 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -474,6 +474,8 @@ struct netdev_rxq_dpdk {  static void
> netdev_dpdk_destruct(struct netdev *netdev);  static void
> netdev_dpdk_vhost_destruct(struct netdev *netdev);
>
> +static int netdev_dpdk_get_sw_custom_stats(const struct netdev *,
> +   struct netdev_custom_stats
> +*);
>  static void netdev_dpdk_clear_xstats(struct netdev_dpdk *dev);
>
>  int netdev_dpdk_get_vid(const struct netdev_dpdk *dev); @@ -1174,7
> +1176,7 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no,
>  dev->rte_xstats_ids = NULL;
>  dev->rte_xstats_ids_size = 0;
>
> -dev->tx_retries = 0;
> +dev->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX;
>
>  return 0;
>  }
> @@ -2775,7 +2777,9 @@ netdev_dpdk_get_custom_stats(const struct netdev
> *netdev,
>
>  uint32_t i;
>  struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -int rte_xstats_ret;
> +int rte_xstats_ret, sw_stats_size;
> +
> +netdev_dpdk_get_sw_custom_stats(netdev, custom_stats);
>
>  ovs_mutex_lock(&dev->mutex);
>
> @@ -2790,23 +2794,22 @@ netdev_dpdk_get_custom_stats(const struct netdev 
> *netdev,
>  if (rte_xstats_ret > 0 &&
>  rte_xstats_ret <= dev->rte_xstats_ids_size) {
>
> -custom_stats->size = rte_xstats_ret;
> -custom_stats->counters =
> -(struct netdev_custom_counter *) 
> xcalloc(rte_xstats_ret,
> -sizeof(struct netdev_custom_counter));
> +sw_stats_size = custom_stats->size;
> +custom_stats->size += rte_xstats_ret;
> +custom_stats->counters = xrealloc(custom_stats->counters,
> +  custom_stats->size *
> +  sizeof
> + *custom_stats->counters);
>
>  for (i = 0; i < rte_xstats_ret; i++) {
> -ovs_strlcpy(custom_stats->counters[i].name,
> +ovs_strlcpy(custom_stats->counters[sw_stats_size +
> + i].name,
>  netdev_dpdk_get_xstat_name(dev,
> 
> dev->rte_xstats_ids[i]),
>  NETDEV_CUSTOM_STATS_NAME_SIZE);
> -custom_stats->counters[i].value = values[i];
> +custom_stats->counters[sw_stats_size + i].value =
> + values[i];
>  }
>  } else {
>  VLOG_WARN("Cannot get XSTATS values for port: 
> "DPDK_PORT_ID_FMT,
>dev->port_id);
> -custom_stats->counters = NULL;
> -custom_stats->size = 0;
>  /* Let's clear statistics cache, so it will be
>   * reconfigured */
>  netdev_dpdk_clear_xstats(dev); @@ -2821,39 +2824,47 @@
> netdev_dpdk_get_custom_stats(const struct netdev *netdev,  }
>
>  static int
> -netdev_dpdk_vhost_get_custom_stats(const struct netdev *netdev,
> -   struct netdev_custom_stats 
> *custom_stats)
> +netdev_dpdk_get_sw_custom_stats(const struct netdev *netdev,
> +struct netdev_custom_stats
> +*custom_stats)
>  {
>  struct netdev_dpdk *dev = netdev_dpdk_cast(netdev);
> -int i;
> +int i, n;
>
> -#define VHOST_CSTATS \
> -VHOST_CSTAT(tx_retries)
> +#define SW_CSTATS \
> +SW_CSTAT(tx_retries)
>
> -#define VHOST_CSTAT(NAME) + 1
> -custom_stats->size = VHOST_CSTATS;
> -#undef VHOST_CSTAT
> +#define SW_CSTAT(NAME) + 1
> +custom_stats->size = SW_CSTATS;
> +#undef SW_CSTAT
>  custom_stats->counters = xcalloc(custom_stats->size,
>   sizeof *custom_stats->counters);
> -i = 0;
> -#define VHOST_CSTAT(NAME) \
> -ovs_strlcpy(custom_stats->counters[i++].name, #NAME, \
> -  

[ovs-dev] Use more than 200000 flows in datapath/kernel cache flows

2019-10-29 Thread Jerry C.
Sending again with a meaningful subject:)

Dear ovs-dev,

I'm trying to see if I can increase the number of flows supported in
kernel cache. I found two relevant config options
(other_config:flow-limit and upcall/flow limit). My understanding was
that other_config:flow-limit is for the normal openflow flows while
upcall is for the kernel flows
(http://docs.openvswitch.org/en/latest/faq/design/ question one).

It seems I can change the limit for Openflow flows.
This works:
$ovs-vsctl --no-wait set Open_vSwitch . other_config:flow-limit=21

$ovs-vsctl --no-wait get Open_vSwitch . other_config:flow-limit
"21"

However I cannot change the limit for kernel flows.
The set-flow-limit command made a confirmation but the show command
still shows *(limit 20)*
$ovs-appctl upcall/set-flow-limit 21
set flow_limit to 21

$ovs-appctl upcall/show
system@ovs-system:
  flows : (current 70) (avg 68) (max 92) (limit 20)
  dump duration : 3ms
  ufid enabled : true

  127: (keys 3)

...

My question is, is it possible to change the limit on kernel flows and
is upcall the right command?

Thanks,
Jerry
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v5 00/10] optimize openvswitch flow looking up

2019-10-29 Thread David Miller


Date: Sat, 19 Oct 2019 16:08:34 +0800

The date on your postings is in the past.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] (no subject)

2019-10-29 Thread Jerry C.
Dear ovs-dev,

I'm trying to see if I can increase the number of flows supported in
kernel cache. I found two relevant config options
(other_config:flow-limit and upcall/flow limit). My understanding was
that other_config:flow-limit is for the normal openflow flows while
upcall is for the kernel flows
(http://docs.openvswitch.org/en/latest/faq/design/ question one).

It seems I can change the limit for Openflow flows.
This works:
$ovs-vsctl --no-wait set Open_vSwitch . other_config:flow-limit=21

$ovs-vsctl --no-wait get Open_vSwitch . other_config:flow-limit
"21"

However I cannot change the limit for kernel flows.
The set-flow-limit command made a confirmation but the show command
still shows *(limit 20)*
$ovs-appctl upcall/set-flow-limit 21
set flow_limit to 21

$ovs-appctl upcall/show
system@ovs-system:
  flows : (current 70) (avg 68) (max 92) (limit 20)
  dump duration : 3ms
  ufid enabled : true

  127: (keys 3)

...

My question is, is it possible to change the limit on kernel flows and
is upcall the right command?

Thanks,
Jerry
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 10/10] net: openvswitch: simplify the ovs_dp_cmd_new

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

use the specified functions to init resource.

Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
---
 net/openvswitch/datapath.c | 60 +-
 1 file changed, 38 insertions(+), 22 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index aeb76e4..4d48e48 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1576,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct 
nlattr *a[])
return 0;
 }
 
+static int ovs_dp_stats_init(struct datapath *dp)
+{
+   dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
+   if (!dp->stats_percpu)
+   return -ENOMEM;
+
+   return 0;
+}
+
+static int ovs_dp_vport_init(struct datapath *dp)
+{
+   int i;
+
+   dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
+ sizeof(struct hlist_head),
+ GFP_KERNEL);
+   if (!dp->ports)
+   return -ENOMEM;
+
+   for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
+   INIT_HLIST_HEAD(&dp->ports[i]);
+
+   return 0;
+}
+
 static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
struct nlattr **a = info->attrs;
@@ -1584,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
struct datapath *dp;
struct vport *vport;
struct ovs_net *ovs_net;
-   int err, i;
+   int err;
 
err = -EINVAL;
if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID])
@@ -1597,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
err = -ENOMEM;
dp = kzalloc(sizeof(*dp), GFP_KERNEL);
if (dp == NULL)
-   goto err_free_reply;
+   goto err_destroy_reply;
 
ovs_dp_set_net(dp, sock_net(skb->sk));
 
/* Allocate table. */
err = ovs_flow_tbl_init(&dp->table);
if (err)
-   goto err_free_dp;
+   goto err_destroy_dp;
 
-   dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu);
-   if (!dp->stats_percpu) {
-   err = -ENOMEM;
+   err = ovs_dp_stats_init(dp);
+   if (err)
goto err_destroy_table;
-   }
 
-   dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS,
- sizeof(struct hlist_head),
- GFP_KERNEL);
-   if (!dp->ports) {
-   err = -ENOMEM;
-   goto err_destroy_percpu;
-   }
-
-   for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++)
-   INIT_HLIST_HEAD(&dp->ports[i]);
+   err = ovs_dp_vport_init(dp);
+   if (err)
+   goto err_destroy_stats;
 
err = ovs_meters_init(dp);
if (err)
-   goto err_destroy_ports_array;
+   goto err_destroy_ports;
 
/* Set up our datapath device. */
parms.name = nla_data(a[OVS_DP_ATTR_NAME]);
@@ -1675,15 +1691,15 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
 
 err_destroy_meters:
ovs_meters_exit(dp);
-err_destroy_ports_array:
+err_destroy_ports:
kfree(dp->ports);
-err_destroy_percpu:
+err_destroy_stats:
free_percpu(dp->stats_percpu);
 err_destroy_table:
ovs_flow_tbl_destroy(&dp->table);
-err_free_dp:
+err_destroy_dp:
kfree(dp);
-err_free_reply:
+err_destroy_reply:
kfree_skb(reply);
 err:
return err;
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 08/10] net: openvswitch: fix possible memleak on destroy flow-table

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

When we destroy the flow tables which may contain the flow_mask,
so release the flow mask struct.

Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
---
 net/openvswitch/flow_table.c | 185 +++
 1 file changed, 97 insertions(+), 88 deletions(-)

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 5df5182..a128a7f 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -210,6 +210,74 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, 
int size)
return 0;
 }
 
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+  struct sw_flow_mask *new)
+{
+   struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+   int err, ma_count = READ_ONCE(ma->count);
+
+   if (ma_count >= ma->max) {
+   err = tbl_mask_array_realloc(tbl, ma->max +
+ MASK_ARRAY_SIZE_MIN);
+   if (err)
+   return err;
+
+   ma = ovsl_dereference(tbl->mask_array);
+   }
+
+   BUG_ON(ovsl_dereference(ma->masks[ma_count]));
+
+   rcu_assign_pointer(ma->masks[ma_count], new);
+   WRITE_ONCE(ma->count, ma_count +1);
+
+   return 0;
+}
+
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+   struct sw_flow_mask *mask)
+{
+   struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+   int i, ma_count = READ_ONCE(ma->count);
+
+   /* Remove the deleted mask pointers from the array */
+   for (i = 0; i < ma_count; i++) {
+   if (mask == ovsl_dereference(ma->masks[i]))
+   goto found;
+   }
+
+   BUG();
+   return;
+
+found:
+   WRITE_ONCE(ma->count, ma_count -1);
+
+   rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
+   RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+
+   kfree_rcu(mask, rcu);
+
+   /* Shrink the mask array if necessary. */
+   if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+   ma_count <= (ma->max / 3))
+   tbl_mask_array_realloc(tbl, ma->max / 2);
+}
+
+/* Remove 'mask' from the mask list, if it is not needed any more. */
+static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
+{
+   if (mask) {
+   /* ovs-lock is required to protect mask-refcount and
+* mask list.
+*/
+   ASSERT_OVSL();
+   BUG_ON(!mask->ref_count);
+   mask->ref_count--;
+
+   if (!mask->ref_count)
+   tbl_mask_array_del_mask(tbl, mask);
+   }
+}
+
 int ovs_flow_tbl_init(struct flow_table *table)
 {
struct table_instance *ti, *ufid_ti;
@@ -257,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
__table_instance_destroy(ti);
 }
 
-static void table_instance_destroy(struct table_instance *ti,
+static void table_instance_flow_free(struct flow_table *table,
+ struct table_instance *ti,
+ struct table_instance *ufid_ti,
+ struct sw_flow *flow,
+ bool count)
+{
+   hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
+   if (count)
+   table->count--;
+
+   if (ovs_identifier_is_ufid(&flow->id)) {
+   hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+
+   if (count)
+   table->ufid_count--;
+   }
+
+   flow_mask_remove(table, flow->mask);
+}
+
+static void table_instance_destroy(struct flow_table *table,
+  struct table_instance *ti,
   struct table_instance *ufid_ti,
   bool deferred)
 {
@@ -274,13 +363,11 @@ static void table_instance_destroy(struct table_instance 
*ti,
struct sw_flow *flow;
struct hlist_head *head = &ti->buckets[i];
struct hlist_node *n;
-   int ver = ti->node_ver;
-   int ufid_ver = ufid_ti->node_ver;
 
-   hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
-   hlist_del_rcu(&flow->flow_table.node[ver]);
-   if (ovs_identifier_is_ufid(&flow->id))
-   hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
+   hlist_for_each_entry_safe(flow, n, head,
+ flow_table.node[ti->node_ver]) {
+   
+   table_instance_flow_free(table, ti, ufid_ti, flow, 
false);
ovs_flow_free(flow, deferred);
}
}
@@ -305,7 +392,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
 
free_percpu(table->mask_cache);
kfree_rcu(rcu_dereference_raw(table->m

[ovs-dev] [PATCH net-next v5 09/10] net: openvswitch: don't unlock mutex when changing the user_features fails

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

Unlocking of a not locked mutex is not allowed.
Other kernel thread may be in critical section while
we unlock it because of setting user_feature fail.

Fixes: 95a7233c4 ("net: openvswitch: Set OvS recirc_id from tc chain index")
Cc: Paul Blakey 
Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
Acked-by: William Tu 
---
 net/openvswitch/datapath.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9fea7e1..aeb76e4 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1657,6 +1657,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
ovs_dp_reset_user_features(skb, info);
}
 
+   ovs_unlock();
goto err_destroy_meters;
}
 
@@ -1673,7 +1674,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct 
genl_info *info)
return 0;
 
 err_destroy_meters:
-   ovs_unlock();
ovs_meters_exit(dp);
 err_destroy_ports_array:
kfree(dp->ports);
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 06/10] net: openvswitch: simplify the flow_hash

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

Simplify the code and remove the unnecessary BUILD_BUG_ON.

Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
Acked-by: William Tu 
---
 net/openvswitch/flow_table.c | 8 ++--
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index a10d421..3e3d345 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -432,13 +432,9 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table)
 static u32 flow_hash(const struct sw_flow_key *key,
 const struct sw_flow_key_range *range)
 {
-   int key_start = range->start;
-   int key_end = range->end;
-   const u32 *hash_key = (const u32 *)((const u8 *)key + key_start);
-   int hash_u32s = (key_end - key_start) >> 2;
-
+   const u32 *hash_key = (const u32 *)((const u8 *)key + range->start);
/* Make sure number of hash bytes are multiple of u32. */
-   BUILD_BUG_ON(sizeof(long) % sizeof(u32));
+   int hash_u32s = range_n_bytes(range) >> 2;
 
return jhash2(hash_key, hash_u32s, 0);
 }
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 07/10] net: openvswitch: add likely in flow_lookup

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

The most case *index < ma->max, and flow-mask is not NULL.
We add un/likely for performance.

Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
Acked-by: William Tu 
---
 net/openvswitch/flow_table.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 3e3d345..5df5182 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -518,7 +518,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
struct sw_flow_mask *mask;
int i;
 
-   if (*index < ma->max) {
+   if (likely(*index < ma->max)) {
mask = rcu_dereference_ovsl(ma->masks[*index]);
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
@@ -533,7 +533,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
continue;
 
mask = rcu_dereference_ovsl(ma->masks[i]);
-   if (!mask)
+   if (unlikely(!mask))
break;
 
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 04/10] net: openvswitch: optimize flow mask cache hash collision

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

Port the codes to linux upstream and with little changes.

Pravin B Shelar, says:
| In case hash collision on mask cache, OVS does extra flow
| lookup. Following patch avoid it.

Link: 
https://github.com/openvswitch/ovs/commit/0e6efbe2712da03522532dc5e84806a96f6a0dd1
Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
---
 net/openvswitch/flow_table.c | 95 
 1 file changed, 53 insertions(+), 42 deletions(-)

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 237cf85..8d4f50d 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -508,6 +508,9 @@ static struct sw_flow *masked_flow_lookup(struct 
table_instance *ti,
return NULL;
 }
 
+/* Flow lookup does full lookup on flow table. It starts with
+ * mask from index passed in *index.
+ */
 static struct sw_flow *flow_lookup(struct flow_table *tbl,
   struct table_instance *ti,
   struct mask_array *ma,
@@ -516,18 +519,31 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
   u32 *index)
 {
struct sw_flow *flow;
+   struct sw_flow_mask *mask;
int i;
 
-   for (i = 0; i < ma->max; i++)  {
-   struct sw_flow_mask *mask;
-
-   mask = rcu_dereference_ovsl(ma->masks[i]);
+   if (*index < ma->max) {
+   mask = rcu_dereference_ovsl(ma->masks[*index]);
if (mask) {
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
-   if (flow) { /* Found */
-   *index = i;
+   if (flow)
return flow;
-   }
+   }
+   }
+
+   for (i = 0; i < ma->max; i++)  {
+
+   if (i == *index)
+   continue;
+
+   mask = rcu_dereference_ovsl(ma->masks[i]);
+   if (!mask)
+   continue;
+
+   flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
+   if (flow) { /* Found */
+   *index = i;
+   return flow;
}
}
 
@@ -546,58 +562,54 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct 
flow_table *tbl,
  u32 skb_hash,
  u32 *n_mask_hit)
 {
-   struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array);
-   struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
-   struct mask_cache_entry  *entries, *ce, *del;
+   struct mask_array *ma = rcu_dereference(tbl->mask_array);
+   struct table_instance *ti = rcu_dereference(tbl->ti);
+   struct mask_cache_entry *entries, *ce;
struct sw_flow *flow;
-   u32 hash = skb_hash;
+   u32 hash;
int seg;
 
*n_mask_hit = 0;
if (unlikely(!skb_hash)) {
-   u32 __always_unused mask_index;
+   u32 mask_index = 0;
 
return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index);
}
 
-   del = NULL;
+   /* Pre and post recirulation flows usually have the same skb_hash
+* value. To avoid hash collisions, rehash the 'skb_hash' with
+* 'recirc_id'.  */
+   if (key->recirc_id)
+   skb_hash = jhash_1word(skb_hash, key->recirc_id);
+
+   ce = NULL;
+   hash = skb_hash;
entries = this_cpu_ptr(tbl->mask_cache);
 
+   /* Find the cache entry 'ce' to operate on. */
for (seg = 0; seg < MC_HASH_SEGS; seg++) {
-   int index;
-
-   index = hash & (MC_HASH_ENTRIES - 1);
-   ce = &entries[index];
-
-   if (ce->skb_hash == skb_hash) {
-   struct sw_flow_mask *mask;
-   struct sw_flow *flow;
-
-   mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]);
-   if (mask) {
-   flow = masked_flow_lookup(ti, key, mask,
- n_mask_hit);
-   if (flow)  /* Found */
-   return flow;
-   }
-
-   del = ce;
-   break;
+   int index = hash & (MC_HASH_ENTRIES - 1);
+   struct mask_cache_entry *e;
+
+   e = &entries[index];
+   if (e->skb_hash == skb_hash) {
+   flow = flow_lookup(tbl, ti, ma, key, n_mask_hit,
+  &e->mask_index);
+   if (!flow)
+   e->skb_hash = 0;
+   return flow;
}
 
-   if (!del || (del->skb_hash && !ce->skb_hash) ||
-   (rcu_dereference_ovsl(ma->masks[del->

[ovs-dev] [PATCH net-next v5 05/10] net: openvswitch: optimize flow-mask looking up

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

The full looking up on flow table traverses all mask array.
If mask-array is too large, the number of invalid flow-mask
increase, performance will be drop.

One bad case, for example: M means flow-mask is valid and NULL
of flow-mask means deleted.

+---+
| M | NULL | ...  | NULL | M|
+---+

In that case, without this patch, openvswitch will traverses all
mask array, because there will be one flow-mask in the tail. This
patch changes the way of flow-mask inserting and deleting, and the
mask array will be keep as below: there is not a NULL hole. In the
fast path, we can "break" "for" (not "continue") in flow_lookup
when we get a NULL flow-mask.

 "break"
v
+---+
| M | M |  NULL |...   | NULL | NULL|
+---+

This patch don't optimize slow or control path, still using ma->max
to traverse. Slow path:
* tbl_mask_array_realloc
* ovs_flow_tbl_lookup_exact
* flow_mask_find

Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
---
 net/openvswitch/flow_table.c | 101 ++-
 1 file changed, 52 insertions(+), 49 deletions(-)

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 8d4f50d..a10d421 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -538,7 +538,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl,
 
mask = rcu_dereference_ovsl(ma->masks[i]);
if (!mask)
-   continue;
+   break;
 
flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
if (flow) { /* Found */
@@ -695,7 +695,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table 
*tbl,
 int ovs_flow_tbl_num_masks(const struct flow_table *table)
 {
struct mask_array *ma = rcu_dereference_ovsl(table->mask_array);
-   return ma->count;
+   return READ_ONCE(ma->count);
 }
 
 static struct table_instance *table_instance_expand(struct table_instance *ti,
@@ -704,21 +704,33 @@ static struct table_instance 
*table_instance_expand(struct table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
 }
 
-static void tbl_mask_array_delete_mask(struct mask_array *ma,
-  struct sw_flow_mask *mask)
+static void tbl_mask_array_del_mask(struct flow_table *tbl,
+   struct sw_flow_mask *mask)
 {
-   int i;
+   struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+   int i, ma_count = READ_ONCE(ma->count);
 
/* Remove the deleted mask pointers from the array */
-   for (i = 0; i < ma->max; i++) {
-   if (mask == ovsl_dereference(ma->masks[i])) {
-   RCU_INIT_POINTER(ma->masks[i], NULL);
-   ma->count--;
-   kfree_rcu(mask, rcu);
-   return;
-   }
+   for (i = 0; i < ma_count; i++) {
+   if (mask == ovsl_dereference(ma->masks[i]))
+   goto found;
}
+
BUG();
+   return;
+
+found:
+   WRITE_ONCE(ma->count, ma_count -1);
+
+   rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]);
+   RCU_INIT_POINTER(ma->masks[ma_count -1], NULL);
+
+   kfree_rcu(mask, rcu);
+
+   /* Shrink the mask array if necessary. */
+   if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+   ma_count <= (ma->max / 3))
+   tbl_mask_array_realloc(tbl, ma->max / 2);
 }
 
 /* Remove 'mask' from the mask list, if it is not needed any more. */
@@ -732,17 +744,8 @@ static void flow_mask_remove(struct flow_table *tbl, 
struct sw_flow_mask *mask)
BUG_ON(!mask->ref_count);
mask->ref_count--;
 
-   if (!mask->ref_count) {
-   struct mask_array *ma;
-
-   ma = ovsl_dereference(tbl->mask_array);
-   tbl_mask_array_delete_mask(ma, mask);
-
-   /* Shrink the mask array if necessary. */
-   if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
-   ma->count <= (ma->max / 3))
-   tbl_mask_array_realloc(tbl, ma->max / 2);
-   }
+   if (!mask->ref_count)
+   tbl_mask_array_del_mask(tbl, mask);
}
 }
 
@@ -806,6 +809,29 @@ static struct sw_flow_mask *flow_mask_find(const struct 
flow_table *tbl,
return NULL;
 }
 
+static int tbl_mask_array_add_mask(struct flow_table *tbl,
+  struct sw_flow_mask *new)
+{
+   struct mask_array *ma = ovsl_dereference(tbl->mask_array);
+   int err, ma_count = READ_ONCE(ma->count);
+
+   if (ma_count >= ma->max) {
+   err = tbl_mask_array_real

[ovs-dev] [PATCH net-next v5 03/10] net: openvswitch: shrink the mask array if necessary

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

When creating and inserting flow-mask, if there is no available
flow-mask, we realloc the mask array. When removing flow-mask,
if necessary, we shrink mask array.

Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
Acked-by: William Tu 
---
 net/openvswitch/flow_table.c | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 0d1df53..237cf85 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -693,6 +693,23 @@ static struct table_instance *table_instance_expand(struct 
table_instance *ti,
return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
 }
 
+static void tbl_mask_array_delete_mask(struct mask_array *ma,
+  struct sw_flow_mask *mask)
+{
+   int i;
+
+   /* Remove the deleted mask pointers from the array */
+   for (i = 0; i < ma->max; i++) {
+   if (mask == ovsl_dereference(ma->masks[i])) {
+   RCU_INIT_POINTER(ma->masks[i], NULL);
+   ma->count--;
+   kfree_rcu(mask, rcu);
+   return;
+   }
+   }
+   BUG();
+}
+
 /* Remove 'mask' from the mask list, if it is not needed any more. */
 static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 {
@@ -706,18 +723,14 @@ static void flow_mask_remove(struct flow_table *tbl, 
struct sw_flow_mask *mask)
 
if (!mask->ref_count) {
struct mask_array *ma;
-   int i;
 
ma = ovsl_dereference(tbl->mask_array);
-   for (i = 0; i < ma->max; i++) {
-   if (mask == ovsl_dereference(ma->masks[i])) {
-   RCU_INIT_POINTER(ma->masks[i], NULL);
-   ma->count--;
-   kfree_rcu(mask, rcu);
-   return;
-   }
-   }
-   BUG();
+   tbl_mask_array_delete_mask(ma, mask);
+
+   /* Shrink the mask array if necessary. */
+   if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) &&
+   ma->count <= (ma->max / 3))
+   tbl_mask_array_realloc(tbl, ma->max / 2);
}
}
 }
-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 02/10] net: openvswitch: convert mask list in mask array

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

Port the codes to linux upstream and with little changes.

Pravin B Shelar, says:
| mask caches index of mask in mask_list. On packet recv OVS
| need to traverse mask-list to get cached mask. Therefore array
| is better for retrieving cached mask. This also allows better
| cache replacement algorithm by directly checking mask's existence.

Link: 
https://github.com/openvswitch/ovs/commit/d49fc3ff53c65e4eca9cabd52ac63396746a7ef5
Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
Acked-by: William Tu 
---
 net/openvswitch/flow.h   |   1 -
 net/openvswitch/flow_table.c | 210 ---
 net/openvswitch/flow_table.h |   8 +-
 3 files changed, 167 insertions(+), 52 deletions(-)

diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index b830d5f..8080518 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -166,7 +166,6 @@ struct sw_flow_key_range {
 struct sw_flow_mask {
int ref_count;
struct rcu_head rcu;
-   struct list_head list;
struct sw_flow_key_range range;
struct sw_flow_key key;
 };
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 3d515c0..0d1df53 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -34,6 +34,7 @@
 #include 
 
 #define TBL_MIN_BUCKETS1024
+#define MASK_ARRAY_SIZE_MIN16
 #define REHASH_INTERVAL(10 * 60 * HZ)
 
 #define MC_HASH_SHIFT  8
@@ -168,9 +169,51 @@ static struct table_instance *table_instance_alloc(int 
new_size)
return ti;
 }
 
+static struct mask_array *tbl_mask_array_alloc(int size)
+{
+   struct mask_array *new;
+
+   size = max(MASK_ARRAY_SIZE_MIN, size);
+   new = kzalloc(sizeof(struct mask_array) +
+ sizeof(struct sw_flow_mask *) * size, GFP_KERNEL);
+   if (!new)
+   return NULL;
+
+   new->count = 0;
+   new->max = size;
+
+   return new;
+}
+
+static int tbl_mask_array_realloc(struct flow_table *tbl, int size)
+{
+   struct mask_array *old;
+   struct mask_array *new;
+
+   new = tbl_mask_array_alloc(size);
+   if (!new)
+   return -ENOMEM;
+
+   old = ovsl_dereference(tbl->mask_array);
+   if (old) {
+   int i;
+
+   for (i = 0; i < old->max; i++) {
+   if (ovsl_dereference(old->masks[i]))
+   new->masks[new->count++] = old->masks[i];
+   }
+   }
+
+   rcu_assign_pointer(tbl->mask_array, new);
+   kfree_rcu(old, rcu);
+
+   return 0;
+}
+
 int ovs_flow_tbl_init(struct flow_table *table)
 {
struct table_instance *ti, *ufid_ti;
+   struct mask_array *ma;
 
table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
   MC_HASH_ENTRIES,
@@ -178,9 +221,13 @@ int ovs_flow_tbl_init(struct flow_table *table)
if (!table->mask_cache)
return -ENOMEM;
 
+   ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN);
+   if (!ma)
+   goto free_mask_cache;
+
ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
-   goto free_mask_cache;
+   goto free_mask_array;
 
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti)
@@ -188,7 +235,7 @@ int ovs_flow_tbl_init(struct flow_table *table)
 
rcu_assign_pointer(table->ti, ti);
rcu_assign_pointer(table->ufid_ti, ufid_ti);
-   INIT_LIST_HEAD(&table->mask_list);
+   rcu_assign_pointer(table->mask_array, ma);
table->last_rehash = jiffies;
table->count = 0;
table->ufid_count = 0;
@@ -196,6 +243,8 @@ int ovs_flow_tbl_init(struct flow_table *table)
 
 free_ti:
__table_instance_destroy(ti);
+free_mask_array:
+   kfree(ma);
 free_mask_cache:
free_percpu(table->mask_cache);
return -ENOMEM;
@@ -255,6 +304,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
 
free_percpu(table->mask_cache);
+   kfree_rcu(rcu_dereference_raw(table->mask_array), rcu);
table_instance_destroy(ti, ufid_ti, false);
 }
 
@@ -460,17 +510,27 @@ static struct sw_flow *masked_flow_lookup(struct 
table_instance *ti,
 
 static struct sw_flow *flow_lookup(struct flow_table *tbl,
   struct table_instance *ti,
+  struct mask_array *ma,
   const struct sw_flow_key *key,
-  u32 *n_mask_hit)
+  u32 *n_mask_hit,
+  u32 *index)
 {
-   struct sw_flow_mask *mask;
struct sw_flow *flow;
+   int i;
 
-   list_for_each_entry_rcu(mask, &tbl->mask_list, list) {
-   flow = masked_flow_lookup(ti, key, mask, n_mask_hit);
-   

[ovs-dev] [PATCH net-next v5 00/10] optimize openvswitch flow looking up

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

This series patch optimize openvswitch for performance or simplify
codes.

Patch 1, 2, 4: Port Pravin B Shelar patches to
linux upstream with little changes.

Patch 5, 6, 7: Optimize the flow looking up and
simplify the flow hash.

Patch 8, 9: are bugfix.

The performance test is on Intel Xeon E5-2630 v4.
The test topology is show as below:

+---+
|   +---+   |
|   | eth0   ovs-switcheth1 |   | Host0
|   +---+   |
+---+
  ^   |
  |   |
  |   |
  |   |
  |   v
+-++ ++-+
| netperf  | Host1   | netserver| Host2
+--+ +--+

We use netperf send the 64B packets, and insert 255+ flow-mask:
$ ovs-dpctl add-flow ovs-switch 
"in_port(1),eth(dst=00:01:00:00:00:00/ff:ff:ff:ff:ff:01),eth_type(0x0800),ipv4(frag=no)"
 2
...
$ ovs-dpctl add-flow ovs-switch 
"in_port(1),eth(dst=00:ff:00:00:00:00/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)"
 2
$
$ netperf -t UDP_STREAM -H 2.2.2.200 -l 40 -- -m 18

* Without series patch, throughput 8.28Mbps
* With series patch, throughput 46.05Mbps

v5:
rewrite patch 8, release flow-mask when freeing flow

v4:
access ma->count with READ_ONCE/WRITE_ONCE API. More information,
see patch 5 comments. 

v3:
update ma point when realloc mask_array in patch 5

v2:
simplify codes. e.g. use kfree_rcu instead of call_rcu

Tonghao Zhang (10):
  net: openvswitch: add flow-mask cache for performance
  net: openvswitch: convert mask list in mask array
  net: openvswitch: shrink the mask array if necessary
  net: openvswitch: optimize flow mask cache hash collision
  net: openvswitch: optimize flow-mask looking up
  net: openvswitch: simplify the flow_hash
  net: openvswitch: add likely in flow_lookup
  net: openvswitch: fix possible memleak on destroy flow-table
  net: openvswitch: don't unlock mutex when changing the user_features
fails
  net: openvswitch: simplify the ovs_dp_cmd_new

 net/openvswitch/datapath.c   |  65 +---
 net/openvswitch/flow.h   |   1 -
 net/openvswitch/flow_table.c | 381 ++-
 net/openvswitch/flow_table.h |  19 ++-
 4 files changed, 360 insertions(+), 106 deletions(-)

-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net-next v5 01/10] net: openvswitch: add flow-mask cache for performance

2019-10-29 Thread xiangxia . m . yue
From: Tonghao Zhang 

The idea of this optimization comes from a patch which
is committed in 2014, openvswitch community. The author
is Pravin B Shelar. In order to get high performance, I
implement it again. Later patches will use it.

Pravin B Shelar, says:
| On every packet OVS needs to lookup flow-table with every
| mask until it finds a match. The packet flow-key is first
| masked with mask in the list and then the masked key is
| looked up in flow-table. Therefore number of masks can
| affect packet processing performance.

Link: 
https://github.com/openvswitch/ovs/commit/5604935e4e1cbc16611d2d97f50b717aa31e8ec5
Signed-off-by: Tonghao Zhang 
Tested-by: Greg Rose 
Acked-by: William Tu 
---
 net/openvswitch/datapath.c   |   3 +-
 net/openvswitch/flow_table.c | 109 +--
 net/openvswitch/flow_table.h |  11 -
 3 files changed, 107 insertions(+), 16 deletions(-)

diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index f30e406..9fea7e1 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct 
sw_flow_key *key)
stats = this_cpu_ptr(dp->stats_percpu);
 
/* Look up flow. */
-   flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit);
+   flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb),
+&n_mask_hit);
if (unlikely(!flow)) {
struct dp_upcall_info upcall;
 
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index cf3582c..3d515c0 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -36,6 +36,10 @@
 #define TBL_MIN_BUCKETS1024
 #define REHASH_INTERVAL(10 * 60 * HZ)
 
+#define MC_HASH_SHIFT  8
+#define MC_HASH_ENTRIES(1u << MC_HASH_SHIFT)
+#define MC_HASH_SEGS   ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT)
+
 static struct kmem_cache *flow_cache;
 struct kmem_cache *flow_stats_cache __read_mostly;
 
@@ -168,10 +172,15 @@ int ovs_flow_tbl_init(struct flow_table *table)
 {
struct table_instance *ti, *ufid_ti;
 
-   ti = table_instance_alloc(TBL_MIN_BUCKETS);
+   table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) *
+  MC_HASH_ENTRIES,
+  __alignof__(struct 
mask_cache_entry));
+   if (!table->mask_cache)
+   return -ENOMEM;
 
+   ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ti)
-   return -ENOMEM;
+   goto free_mask_cache;
 
ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
if (!ufid_ti)
@@ -187,6 +196,8 @@ int ovs_flow_tbl_init(struct flow_table *table)
 
 free_ti:
__table_instance_destroy(ti);
+free_mask_cache:
+   free_percpu(table->mask_cache);
return -ENOMEM;
 }
 
@@ -243,6 +254,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table)
struct table_instance *ti = rcu_dereference_raw(table->ti);
struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
 
+   free_percpu(table->mask_cache);
table_instance_destroy(ti, ufid_ti, false);
 }
 
@@ -425,7 +437,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow 
*flow,
 
 static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
  const struct sw_flow_key *unmasked,
- const struct sw_flow_mask *mask)
+ const struct sw_flow_mask *mask,
+ u32 *n_mask_hit)
 {
struct sw_flow *flow;
struct hlist_head *head;
@@ -435,6 +448,8 @@ static struct sw_flow *masked_flow_lookup(struct 
table_instance *ti,
ovs_flow_mask_key(&masked_key, unmasked, false, mask);
hash = flow_hash(&masked_key, &mask->range);
head = find_bucket(ti, hash);
+   (*n_mask_hit)++;
+
hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
if (flow->mask == mask && flow->flow_table.hash == hash &&
flow_cmp_masked_key(flow, &masked_key, &mask->range))
@@ -443,30 +458,97 @@ static struct sw_flow *masked_flow_lookup(struct 
table_instance *ti,
return NULL;
 }
 
-struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl,
-   const struct sw_flow_key *key,
-   u32 *n_mask_hit)
+static struct sw_flow *flow_lookup(struct flow_table *tbl,
+  struct table_instance *ti,
+  const struct sw_flow_key *key,
+  u32 *n_mask_hit)
 {
-   struct table_instance *ti = rcu_dereference_ovsl(tbl->ti);
struct sw_flow_mask *mask;
struct sw_flow *flow;
 
-   *n

[ovs-dev] [HELP] Question about userspace meter

2019-10-29 Thread txfh2007 via dev
Hi all:
I have been testing OVS-DPDK Rate limit using meter table these days, but I 
have found the rate deviation is larger than kernel meter. My test result is as 
below:
   case 1. No Rate limit : Iperf between two VMs(on the same compute node) TCP 
bandwidth is above 1G 
   case 2. Rate limit: 100Mbps, burst 80Mbps , Iperf bandwith is around 80Mbps
   case 3.Rate limit: 100Mbps, burst 100Mbps , Iperf bandwith is also around 
80Mbps

   the meter setting of case 2 and 3 is as below:
   case2 : meter=1 kbps burst stats bands= type=drop rate=10 
burst_size=8
   case3 : meter=1 kbps burst stats bands= type=drop rate=10 
burst_size=10

   the dp->meters and meter bands parameter is as below:
   case 2:
   (gdb) p *$5
  $6 = {flags = 13, n_bands = 1, max_delta_t = 800, used = 
7071330038973, packet_count = 1150879, byte_count = 1652639486, bands = 
0x12adf70}
  (gdb) p $5->bands[0]
  $7 = {up = {type = 1, prec_level = 0 '\000', rate = 10, 
burst_size = 8000}, bucket = 9712, packet_count = 125916, byte_count = 
180814928}
case 3:
  $3 = {flags = 13, n_bands = 1, max_delta_t = 1000, used = 
7072663887432, packet_count = 6209035, byte_count = 8916116275, bands = 
0x13c1990}
(gdb) p $2->bands[0]
  $4 = {up = {type = 1, prec_level = 0 '\000', rate = 10, 
burst_size = 1}, bucket = 489312, packet_count = 728789, byte_count = 
1046541004}

I don't know the reason of the test deviation and how to set burst size ? Can 
anyone give me a simple explanation of the relationship between rate and burst 
value ? 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCHv2] netdev-afxdp: Enable loading XDP program.

2019-10-29 Thread William Tu
Hi Eelco,

Thanks for your review.

On Tue, Oct 29, 2019 at 8:28 AM Eelco Chaudron  wrote:
>
> See comments inline…
>
> On 5 Oct 2019, at 2:12, William Tu wrote:
>
> > Now netdev-afxdp always forwards all packets to userspace because
> > it is using libbpf's default XDP program, see 'xsk_load_xdp_prog'.
> > There are some cases when users want to keep packets in kernel instead
> > of sending to userspace, for example, management traffic such as SSH
> > should be processed in kernel.
> >
> > The patch enables loading the user-provide XDP program by doing
> >   $ovs-vsctl -- set int afxdp-p0 options:xdpobj=
>
> Should we maybe change the name to xdp-obj?

Yes, will fix it next version.

>
> > So users can implement their filtering logic or traffic steering idea
> > in their XDP program, and rest of the traffic passes to AF_XDP socket
> > handled by OVS.
> >
> > Signed-off-by: William Tu 
> > ---
> > v2:
> > A couple fixes and remove RFC
> > ---
> >  Documentation/intro/install/afxdp.rst |  46 
> >  lib/netdev-afxdp.c| 137
> > ++
> >  lib/netdev-linux-private.h|   2 +
> >  3 files changed, 169 insertions(+), 16 deletions(-)
> >
> > diff --git a/Documentation/intro/install/afxdp.rst
> > b/Documentation/intro/install/afxdp.rst
> > index 820e9d993d8f..f484bdb8249b 100644
> > --- a/Documentation/intro/install/afxdp.rst
> > +++ b/Documentation/intro/install/afxdp.rst
> > @@ -265,6 +265,52 @@ Or, use OVS pmd tool::
> >ovs-appctl dpif-netdev/pmd-stats-show
> >
> >
> > +Loading Custom XDP Program
> > +--
> > +By defailt, netdev-afxdp always forwards all packets to userspace
> > because
> > +it is using libbpf's default XDP program. There are some cases when
> > users
> > +want to keep packets in kernel instead of sending to userspace, for
> > example,
> > +management traffic such as SSH should be processed in kernel. This
> > can be
> > +done by loading the user-provided XDP program::
> > +
> > +  ovs-vsctl -- set int afxdp-p0 options:xdpobj=
> > +
> > +So users can implement their filtering logic or traffic steering idea
> > +in their XDP program, and rest of the traffic passes to AF_XDP socket
> > +handled by OVS. Below is a sample C program compiled under kernel's
> > +samples/bpf/.
> > +
> > +.. code-block:: c
> > +
> > +  #include 
> > +  #include "bpf_helpers.h"
> > +
> > +  /* OVS will associate map 'xsks_map' to xsk socket. */
> > +  struct bpf_map_def SEC("maps") xsks_map = {
> > +  .type = BPF_MAP_TYPE_XSKMAP,
> > +  .key_size = sizeof(int),
> > +  .value_size = sizeof(int),
> > +  .max_entries = 32,
> > +  };
> > +
>
> Maybe something like this could be added to make sure it compiles with
> older kernels:
>
> #if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0)
>

Good point, I will add it.

> /* Kernel version before 5.3 needed an additional map */
> struct bpf_map_def SEC("maps") qidconf_map = {
> .type = BPF_MAP_TYPE_ARRAY,
> .key_size = sizeof(int),
> .value_size = sizeof(int),
> .max_entries = 64,
> };
> #endi
>
> See also here:
> https://github.com/chaudron/xdp-tutorial/blob/master/advanced03-AF_XDP/af_xdp_kern.c
>
> > +  SEC("xdp_sock")
> > +  int xdp_sock_prog(struct xdp_md *ctx)
> > +  {
> > +  int index = ctx->rx_queue_index;
> > +
> > +  /* Customized by user.
> > +   * For example
> > +   * 1) filter out all SSH traffic and return XDP_PASS
> > +   *for kernel to process.
> > +   * 2) Drop unwanted packet by returning XDP_DROP.
> > +   */
> > +
> > +  /* Rest of packets goes to AF_XDP. */
> > +  return bpf_redirect_map(&xsks_map, index, 0);
> > +  }
> > +  char _license[] SEC("license") = "GPL";
> > +
> > +
> >  Example Script
> >  --
> >
> > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
> > index 6e01803272aa..231d6473fb67 100644
> > --- a/lib/netdev-afxdp.c
> > +++ b/lib/netdev-afxdp.c
> > @@ -21,6 +21,7 @@
> >  #include "netdev-afxdp.h"
> >  #include "netdev-afxdp-pool.h"
> >
> > +#include 
> >  #include 
> >  #include 
> >  #include 
> > @@ -29,6 +30,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >
> > @@ -82,7 +84,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
> >  #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char
> > *)base))
> >
> >  static struct xsk_socket_info *xsk_configure(int ifindex, int
> > xdp_queue_id,
> > - int mode);
> > + int mode, bool
> > use_default_xdp);
> >  static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
> >  static void xsk_destroy(struct xsk_socket_info *xsk);
> >  static int xsk_configure_all(struct netdev *netdev);
> > @@ -158,6 +160,51 @@ netdev_afxdp_sweep_unused_pools(void *aux
> > OVS_UNUSED)
> >  ovs_mutex_unlock(&unused_pools_mutex);
> >  }
> >
> > +static int
> > +xsk_load

Re: [ovs-dev] [PATCH ovn 4/6] northd: Add lflows for IPv6 NAT.

2019-10-29 Thread 0-day Robot
Bleep bloop.  Greetings Russell Bryant, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 82 characters long (recommended limit is 79)
#86 FILE: northd/ovn-northd.c:6095:
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
1);

WARNING: Line is 82 characters long (recommended limit is 79)
#192 FILE: northd/ovn-northd.c:6980:
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
1);

WARNING: Line is 83 characters long (recommended limit is 79)
#247 FILE: northd/ovn-northd.c:7026:
  "inport == %s && arp.tpa == "IP_FMT" && arp.op == 
1",

WARNING: Line is 81 characters long (recommended limit is 79)
#367 FILE: northd/ovn-northd.c:7176:
&& op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j].ipv4) 
{

WARNING: Line is 82 characters long (recommended limit is 79)
#470 FILE: northd/ovn-northd.c:7420:
static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
1);

WARNING: Line is 81 characters long (recommended limit is 79)
#476 FILE: northd/ovn-northd.c:7426:
/* It was an invalid IPv4 address, but valid IPv6. Treat the 
rest

WARNING: Line is 123 characters long (recommended limit is 79)
#500 FILE: northd/ovn-northd.c:7450:
if (error || (!is_v6 && mask != OVS_BE32_MAX) || (is_v6 && 
memcmp(&mask_v6, &v6_exact, sizeof(mask_v6 {

Lines checked: 687, Warnings: 7, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn 3/6] ovn-nbctl: Allow IPv6 NAT rules to be added

2019-10-29 Thread 0-day Robot
Bleep bloop.  Greetings Russell Bryant, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 81 characters long (recommended limit is 79)
#154 FILE: utilities/ovn-nbctl.c:3876:
ctl_error(ctx, "%s: Not a valid IPv4 or IPv6 address.", 
external_ip);

Lines checked: 207, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn 2/6] actions: Add IPv6 support to lflow NAT actions

2019-10-29 Thread 0-day Robot
Bleep bloop.  Greetings Russell Bryant, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 85 characters long (recommended limit is 79)
#172 FILE: utilities/ovn-trace.c:1902:
ds_put_format(&s, "(ip4.%s="IP_FMT")", direction, 
IP_ARGS(ct_nat->ipv4));

Lines checked: 183, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn 5/6] system-ovn: Add IPv6 NAT test cases

2019-10-29 Thread Russell Bryant
These tests failed prior to the changes leading up to this one.

Signed-off-by: Russell Bryant 
---
 tests/system-ovn.at | 862 +++-
 1 file changed, 860 insertions(+), 2 deletions(-)

diff --git a/tests/system-ovn.at b/tests/system-ovn.at
index f88ad31e4..b3f90aae2 100644
--- a/tests/system-ovn.at
+++ b/tests/system-ovn.at
@@ -176,6 +176,186 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port 
patch-.*/d
 /connection dropped.*/d"])
 AT_CLEANUP
 
+AT_SETUP([ovn -- 2 LRs connected via LS, gateway router, SNAT and DNAT - IPv6])
+AT_KEYWORDS([ovnnat])
+
+CHECK_CONNTRACK()
+CHECK_CONNTRACK_NAT()
+ovn_start
+OVS_TRAFFIC_VSWITCHD_START()
+ADD_BR([br-int])
+
+# Set external-ids in br-int needed for ovn-controller
+ovs-vsctl \
+-- set Open_vSwitch . external-ids:system-id=hv1 \
+-- set Open_vSwitch . 
external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \
+-- set Open_vSwitch . external-ids:ovn-encap-type=geneve \
+-- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \
+-- set bridge br-int fail-mode=secure other-config:disable-in-band=true
+
+# Start ovn-controller
+start_daemon ovn-controller
+
+# Logical network:
+# Two LRs - R1 and R2 that are connected to each other via LS "join"
+# in fd00::/64 network. R1 has switchess foo (fd11::/64) and
+# bar (fd12::/64) connected to it. R2 has alice (fd21::/64) connected
+# to it.  R2 is a gateway router on which we add NAT rules.
+#
+#foo -- R1 -- join - R2 -- alice
+#   |
+#bar 
+
+ovn-nbctl create Logical_Router name=R1
+ovn-nbctl create Logical_Router name=R2 options:chassis=hv1
+
+ovn-nbctl ls-add foo
+ovn-nbctl ls-add bar
+ovn-nbctl ls-add alice
+ovn-nbctl ls-add join
+
+# Connect foo to R1
+ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 fd11::1/64
+ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \
+type=router options:router-port=foo addresses=\"00:00:01:01:02:03\"
+
+# Connect bar to R1
+ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 fd12::1/64
+ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \
+type=router options:router-port=bar addresses=\"00:00:01:01:02:04\"
+
+# Connect alice to R2
+ovn-nbctl lrp-add R2 alice 00:00:02:01:02:03 fd21::1/64
+ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \
+type=router options:router-port=alice addresses=\"00:00:02:01:02:03\"
+
+# Connect R1 to join
+ovn-nbctl lrp-add R1 R1_join 00:00:04:01:02:03 fd00::1/64
+ovn-nbctl lsp-add join r1-join -- set Logical_Switch_Port r1-join \
+type=router options:router-port=R1_join addresses='"00:00:04:01:02:03"'
+
+# Connect R2 to join
+ovn-nbctl lrp-add R2 R2_join 00:00:04:01:02:04 fd00::2/64
+ovn-nbctl lsp-add join r2-join -- set Logical_Switch_Port r2-join \
+type=router options:router-port=R2_join addresses='"00:00:04:01:02:04"'
+
+# Static routes.
+ovn-nbctl lr-route-add R1 fd21::/64 fd00::2
+ovn-nbctl lr-route-add R2 fd11::/64 fd00::1
+ovn-nbctl lr-route-add R2 fd12::/64 fd00::1
+
+# Logical port 'foo1' in switch 'foo'.
+ADD_NAMESPACES(foo1)
+ADD_VETH(foo1, foo1, br-int, "fd11::2/64", "f0:00:00:01:02:03", \
+ "fd11::1")
+OVS_WAIT_UNTIL([test "$(ip netns exec foo1 ip a | grep fd11::2 | grep 
tentative)" = ""])
+ovn-nbctl lsp-add foo foo1 \
+-- lsp-set-addresses foo1 "f0:00:00:01:02:03 fd11::2"
+
+# Logical port 'alice1' in switch 'alice'.
+ADD_NAMESPACES(alice1)
+ADD_VETH(alice1, alice1, br-int, "fd21::2/64", "f0:00:00:01:02:04", \
+ "fd21::1")
+OVS_WAIT_UNTIL([test "$(ip netns exec alice1 ip a | grep fd21::2 | grep 
tentative)" = ""])
+ovn-nbctl lsp-add alice alice1 \
+-- lsp-set-addresses alice1 "f0:00:00:01:02:04 fd21::2"
+
+# Logical port 'bar1' in switch 'bar'.
+ADD_NAMESPACES(bar1)
+ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:05", \
+ "fd12::1")
+OVS_WAIT_UNTIL([test "$(ip netns exec bar1 ip a | grep fd12::2 | grep 
tentative)" = ""])
+ovn-nbctl lsp-add bar bar1 \
+-- lsp-set-addresses bar1 "f0:00:00:01:02:05 fd12::2"
+
+# Add a DNAT rule.
+ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip=\"fd11::2\" \
+external_ip=\"fd30::2\" -- add logical_router R2 nat @nat
+
+# Add a SNAT rule
+ovn-nbctl -- --id=@nat create nat type="snat" logical_ip=\"fd12::2\" \
+external_ip=\"fd30::1\" -- add logical_router R2 nat @nat
+
+# wait for ovn-controller to catch up.
+ovn-nbctl --wait=hv sync
+OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 'nat(src=fd30::1)'])
+
+# 'alice1' should be able to ping 'foo1' directly.
+NS_CHECK_EXEC([alice1], [ping -6 -v -q -c 3 -i 0.3 -w 2 fd11::2 | 
FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# North-South DNAT: 'alice1' should also be able to ping 'foo1' via fd30::2
+NS_CHECK_EXEC([alice1], [ping -6 -q -c 3 -i 0.3 -w 2 fd30::2 | FORMAT_PING], \
+[0], [dnl
+3 packets transmitted, 3 received, 0% packet loss, time 0ms
+])
+
+# Check conntrack entries.
+AT_CHECK([ovs-appctl dpctl/dump-conntra

[ovs-dev] [PATCH ovn 3/6] ovn-nbctl: Allow IPv6 NAT rules to be added

2019-10-29 Thread Russell Bryant
Signed-off-by: Russell Bryant 
---
 tests/ovn-nbctl.at| 41 
 utilities/ovn-nbctl.c | 48 ---
 2 files changed, 68 insertions(+), 21 deletions(-)

diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
index 01091dd99..43a980bdf 100644
--- a/tests/ovn-nbctl.at
+++ b/tests/ovn-nbctl.at
@@ -407,16 +407,16 @@ AT_CHECK([ovn-nbctl lr-nat-add lr0 snatt 30.0.0.2 
192.168.1.2], [1], [],
 [ovn-nbctl: snatt: type must be one of "dnat", "snat" and "dnat_and_snat".
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2a 192.168.1.2], [1], [],
-[ovn-nbctl: 30.0.0.2a: should be an IPv4 address.
+[ovn-nbctl: 30.0.0.2a: Not a valid IPv4 or IPv6 address.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0 192.168.1.2], [1], [],
-[ovn-nbctl: 30.0.0: should be an IPv4 address.
+[ovn-nbctl: 30.0.0: Not a valid IPv4 or IPv6 address.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2/24 192.168.1.2], [1], [],
-[ovn-nbctl: 30.0.0.2/24: should be an IPv4 address.
+[ovn-nbctl: 30.0.0.2/24: Not a valid IPv4 or IPv6 address.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2:80 192.168.1.2], [1], [],
-[ovn-nbctl: 30.0.0.2:80: should be an IPv4 address.
+[ovn-nbctl: 30.0.0.2:80: Not a valid IPv4 or IPv6 address.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2 192.168.1.2a], [1], [],
 [ovn-nbctl: 192.168.1.2a: should be an IPv4 address or network.
@@ -431,19 +431,19 @@ AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2 
192.168.1.2/a], [1], [],
 [ovn-nbctl: 192.168.1.2/a: should be an IPv4 address or network.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.2 192.168.1.2a], [1], [],
-[ovn-nbctl: 192.168.1.2a: should be an IPv4 address.
+[ovn-nbctl: 192.168.1.2a: Not a valid IPv4 address.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.2 192.168.1], [1], [],
-[ovn-nbctl: 192.168.1: should be an IPv4 address.
+[ovn-nbctl: 192.168.1: Not a valid IPv4 address.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.2 192.168.1.2:80], [1], [],
-[ovn-nbctl: 192.168.1.2:80: should be an IPv4 address.
+[ovn-nbctl: 192.168.1.2:80: Not a valid IPv4 address.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.2 192.168.1.2/24], [1], [],
-[ovn-nbctl: 192.168.1.2/24: should be an IPv4 address.
+[ovn-nbctl: 192.168.1.2/24: Not a valid IPv4 address.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.2 192.168.1.2/24], 
[1], [],
-[ovn-nbctl: 192.168.1.2/24: should be an IPv4 address.
+[ovn-nbctl: 192.168.1.2/24: Not a valid IPv4 address.
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.2 192.168.1.2 lp0], 
[1], [],
 [ovn-nbctl: lr-nat-add with logical_port must also specify external_mac.
@@ -465,15 +465,23 @@ AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.2 
192.168.1.2 lp0 00:00:
 
 dnl Add snat and dnat
 AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.1 192.168.1.0/24])
+AT_CHECK([ovn-nbctl lr-nat-add lr0 snat fd01::1 fd11::/64])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.1 192.168.1.2])
+AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat fd01::1 fd11::2])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.1 192.168.1.2])
+AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat fd01::1 fd11::2])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.2 192.168.1.3 lp0 
00:00:00:01:02:03])
+AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat fd01::2 fd11::3 lp0 
00:00:00:01:02:03])
 AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl
 TYPE EXTERNAL_IPLOGICAL_IPEXTERNAL_MAC 
LOGICAL_PORT
 dnat 30.0.0.1   192.168.1.2
+dnat fd01::1fd11::2
 dnat_and_snat30.0.0.1   192.168.1.2
 dnat_and_snat30.0.0.2   192.168.1.3   00:00:00:01:02:03
lp0
+dnat_and_snatfd01::1fd11::2
+dnat_and_snatfd01::2fd11::3   00:00:00:01:02:03
lp0
 snat 30.0.0.1   192.168.1.0/24
+snat fd01::1fd11::/64
 ])
 AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.1 192.168.1.0/24], [1], [],
 [ovn-nbctl: 30.0.0.1, 192.168.1.0/24: a NAT with this external_ip and 
logical_ip already exists
@@ -503,17 +511,26 @@ AT_CHECK([ovn-nbctl --may-exist lr-nat-add lr0 
dnat_and_snat 30.0.0.2 192.168.1.
 AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl
 TYPE EXTERNAL_IPLOGICAL_IPEXTERNAL_MAC 
LOGICAL_PORT
 dnat 30.0.0.1   192.168.1.2
+dnat fd01::1fd11::2
 dnat_and_snat30.0.0.1   192.168.1.2
 dnat_and_snat30.0.0.2   192.168.1.3   00:00:00:04:05:06
lp0
+dnat_and_snatfd01::1fd11::2
+dnat_and_snatfd01::2fd11::3   00:00:00:01:02:03
lp0
 snat 30.0.0.1   192.168.1.0/24
+snat fd01::1fd11::/64
 ])
 AT_CHECK([ovn-nbctl --may-exist lr-nat-add lr0 dnat_and_snat 30.0.0.2 
192.168.1.3])
+AT_CHECK([ovn-nbct

[ovs-dev] [PATCH ovn 6/6] NEWS: Add IPv6 NAT support

2019-10-29 Thread Russell Bryant
Signed-off-by: Russell Bryant 
---
 NEWS | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/NEWS b/NEWS
index 73045d65f..ab2f13318 100644
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,9 @@
+Post-OVS-v2.12.0
+-
+   - OVN was split out from the OVS repository and is now released
+ independently.
+   - Added IPv6 NAT support for OVN routers.
+
 Post-v2.11.0
 -
- DPDK:
-- 
2.21.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn 4/6] northd: Add lflows for IPv6 NAT.

2019-10-29 Thread Russell Bryant
Signed-off-by: Russell Bryant 
---
 northd/ovn-northd.c | 376 
 1 file changed, 278 insertions(+), 98 deletions(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index ae81a6944..a10017ba1 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -66,6 +66,15 @@ struct northd_context {
 struct ovsdb_idl_index *sbrec_ip_mcast_by_dp;
 };
 
+/* An IPv4 or IPv6 address */
+struct v46_ip {
+int family;
+union {
+ovs_be32 ipv4;
+struct in6_addr ipv6;
+};
+};
+
 static const char *ovnnb_db;
 static const char *ovnsb_db;
 static const char *unixctl_path;
@@ -2273,6 +2282,15 @@ get_nat_addresses(const struct ovn_port *op, size_t *n)
 break;
 }
 }
+if (!is_router_ip) {
+for (size_t j = 0; j < op->lrp_networks.n_ipv6_addrs; j++) {
+if (!strcmp(nat->external_ip,
+op->lrp_networks.ipv6_addrs[j].addr_s)) {
+is_router_ip = true;
+break;
+}
+}
+}
 
 if (!is_router_ip) {
 ds_put_format(&c_addresses, " %s", nat->external_ip);
@@ -6031,9 +6049,28 @@ add_distributed_nat_routes(struct hmap *lflows, const 
struct ovn_port *op)
 continue;
 }
 
+/* Determine if we need to create IPv4 or IPv6 flows */
+ovs_be32 ip;
+struct in6_addr ipv6;
+int family = AF_INET;
+if (!ip_parse(nat->external_ip, &ip) || !ip) {
+family = AF_INET6;
+if (!ipv6_parse(nat->external_ip, &ipv6)) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1);
+VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
+ "for router %s", nat->external_ip, op->key);
+/* We'll create IPv6 flows anyway, but the address
+ * is probably bogus ... */
+}
+}
+
 ds_put_format(&match, "inport == %s && "
-  "ip4.src == %s && ip4.dst == %s",
-   op->json_key, nat->logical_ip, nat->external_ip);
+  "ip%s.src == %s && ip%s.dst == %s",
+   op->json_key,
+   family == AF_INET ? "4" : "6",
+   nat->logical_ip,
+   family == AF_INET ? "4" : "6",
+   nat->external_ip);
 ds_put_format(&actions, "outport = %s; eth.dst = %s; "
   REGBIT_DISTRIBUTED_NAT" = 1; "
   REGBIT_NAT_REDIRECT" = 0; next;",
@@ -6051,17 +6088,37 @@ add_distributed_nat_routes(struct hmap *lflows, const 
struct ovn_port *op)
 !nat2->external_mac || !nat2->external_ip)
 continue;
 
+family = AF_INET;
+if (!ip_parse(nat2->external_ip, &ip) || !ip) {
+family = AF_INET6;
+if (!ipv6_parse(nat2->external_ip, &ipv6)) {
+static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 
1);
+VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration "
+ "for router %s", nat2->external_ip, op->key);
+/* We'll create IPv6 flows anyway, but the address
+ * is probably bogus ... */
+}
+}
+
 ds_put_format(&match, "inport == %s && "
-  "ip4.src == %s && ip4.dst == %s",
-  op->json_key, nat->logical_ip, nat2->external_ip);
+  "ip%s.src == %s && ip%s.dst == %s",
+  op->json_key,
+  family == AF_INET ? "4" : "6",
+  nat->logical_ip,
+  family == AF_INET ? "4" : "6",
+  nat2->external_ip);
 ds_put_format(&actions, "outport = %s; "
   "eth.src = %s; eth.dst = %s; "
-  "reg0 = ip4.dst; reg1 = %s; "
+  "%sreg0 = ip%s.dst; %sreg1 = %s; "
   REGBIT_DISTRIBUTED_NAT" = 1; "
   REGBIT_NAT_REDIRECT" = 0; next;",
   op->od->l3dgw_port->json_key,
   op->od->l3dgw_port->lrp_networks.ea_s,
-  nat2->external_mac, nat->external_ip);
+  nat2->external_mac,
+  family == AF_INET ? "" : "xx",
+  family == AF_INET ? "4" : "6",
+  family == AF_INET ? "" : "xx",
+  nat->external_ip);
 ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, 400,
   ds_cstr(&match), ds_cstr(&actions));
 ds_clear(&match);
@@ -6298,7 +6355,8 @@ op_put

[ovs-dev] [PATCH ovn 1/6] northd: Fix table ID for IPv6 router ingress.

2019-10-29 Thread Russell Bryant
I noticed that this table number was outdated.  This is now table 3.
There are a few other sections of code for this table that were all
correctly referencing table 3.

Signed-off-by: Russell Bryant 
---
 northd/ovn-northd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 194e4bf4a..ae81a6944 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -7064,7 +7064,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
 free(snat_ips);
 }
 
-/* Logical router ingress table 1: IP Input for IPv6. */
+/* Logical router ingress table 3: IP Input for IPv6. */
 HMAP_FOR_EACH (op, key_node, ports) {
 if (!op->nbrp) {
 continue;
-- 
2.21.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH ovn 2/6] actions: Add IPv6 support to lflow NAT actions

2019-10-29 Thread Russell Bryant
Signed-off-by: Russell Bryant 
---
 include/ovn/actions.h |  6 +-
 lib/actions.c | 35 +++
 tests/ovn.at  | 18 --
 utilities/ovn-trace.c | 14 +-
 4 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/include/ovn/actions.h b/include/ovn/actions.h
index 4e2f4d28d..f4997e9c9 100644
--- a/include/ovn/actions.h
+++ b/include/ovn/actions.h
@@ -225,7 +225,11 @@ struct ovnact_ct_commit {
 /* OVNACT_CT_DNAT, OVNACT_CT_SNAT. */
 struct ovnact_ct_nat {
 struct ovnact ovnact;
-ovs_be32 ip;
+int family;
+union {
+struct in6_addr ipv6;
+ovs_be32 ipv4;
+};
 uint8_t ltable; /* Logical table ID of next table. */
 };
 
diff --git a/lib/actions.c b/lib/actions.c
index c8c9cc5fd..a999a4fda 100644
--- a/lib/actions.c
+++ b/lib/actions.c
@@ -755,11 +755,18 @@ parse_ct_nat(struct action_context *ctx, const char *name,
 
 if (lexer_match(ctx->lexer, LEX_T_LPAREN)) {
 if (ctx->lexer->token.type != LEX_T_INTEGER
-|| ctx->lexer->token.format != LEX_F_IPV4) {
-lexer_syntax_error(ctx->lexer, "expecting IPv4 address");
+|| (ctx->lexer->token.format != LEX_F_IPV4
+&& ctx->lexer->token.format != LEX_F_IPV6)) {
+lexer_syntax_error(ctx->lexer, "expecting IPv4 or IPv6 address");
 return;
 }
-cn->ip = ctx->lexer->token.value.ipv4;
+if (ctx->lexer->token.format == LEX_F_IPV4) {
+cn->family = AF_INET;
+cn->ipv4 = ctx->lexer->token.value.ipv4;
+} else if (ctx->lexer->token.format == LEX_F_IPV6) {
+cn->family = AF_INET6;
+cn->ipv6 = ctx->lexer->token.value.ipv6;
+}
 lexer_get(ctx->lexer);
 
 if (!lexer_force_match(ctx->lexer, LEX_T_RPAREN)) {
@@ -784,8 +791,12 @@ static void
 format_ct_nat(const struct ovnact_ct_nat *cn, const char *name, struct ds *s)
 {
 ds_put_cstr(s, name);
-if (cn->ip) {
-ds_put_format(s, "("IP_FMT")", IP_ARGS(cn->ip));
+if (cn->family == AF_INET) {
+ds_put_format(s, "("IP_FMT")", IP_ARGS(cn->ipv4));
+} else if (cn->family == AF_INET6) {
+ds_put_char(s, '(');
+ipv6_format_addr(&cn->ipv6, s);
+ds_put_char(s, ')');
 }
 ds_put_char(s, ';');
 }
@@ -831,9 +842,17 @@ encode_ct_nat(const struct ovnact_ct_nat *cn,
 nat->flags = 0;
 nat->range_af = AF_UNSPEC;
 
-if (cn->ip) {
+if (cn->family == AF_INET) {
 nat->range_af = AF_INET;
-nat->range.addr.ipv4.min = cn->ip;
+nat->range.addr.ipv4.min = cn->ipv4;
+if (snat) {
+nat->flags |= NX_NAT_F_SRC;
+} else {
+nat->flags |= NX_NAT_F_DST;
+}
+} else if (cn->family == AF_INET6) {
+nat->range_af = AF_INET6;
+nat->range.addr.ipv6.min = cn->ipv6;
 if (snat) {
 nat->flags |= NX_NAT_F_SRC;
 } else {
@@ -843,7 +862,7 @@ encode_ct_nat(const struct ovnact_ct_nat *cn,
 
 ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset);
 ct = ofpacts->header;
-if (cn->ip) {
+if (cn->family == AF_INET || cn->family == AF_INET6) {
 ct->flags |= NX_CT_F_COMMIT;
 }
 ofpact_finish(ofpacts, &ct->ofpact);
diff --git a/tests/ovn.at b/tests/ovn.at
index 9f06059fa..d78689d86 100644
--- a/tests/ovn.at
+++ b/tests/ovn.at
@@ -1043,15 +1043,18 @@ ct_dnat;
 ct_dnat(192.168.1.2);
 encodes as 
ct(commit,table=19,zone=NXM_NX_REG11[0..15],nat(dst=192.168.1.2))
 has prereqs ip
+ct_dnat(fd11::2);
+encodes as ct(commit,table=19,zone=NXM_NX_REG11[0..15],nat(dst=fd11::2))
+has prereqs ip
 
 ct_dnat(192.168.1.2, 192.168.1.3);
 Syntax error at `,' expecting `)'.
 ct_dnat(foo);
-Syntax error at `foo' expecting IPv4 address.
+Syntax error at `foo' expecting IPv4 or IPv6 address.
 ct_dnat(foo, bar);
-Syntax error at `foo' expecting IPv4 address.
+Syntax error at `foo' expecting IPv4 or IPv6 address.
 ct_dnat();
-Syntax error at `)' expecting IPv4 address.
+Syntax error at `)' expecting IPv4 or IPv6 address.
 
 # ct_snat
 ct_snat;
@@ -1060,15 +1063,18 @@ ct_snat;
 ct_snat(192.168.1.2);
 encodes as 
ct(commit,table=19,zone=NXM_NX_REG12[0..15],nat(src=192.168.1.2))
 has prereqs ip
+ct_snat(fd11::2);
+encodes as ct(commit,table=19,zone=NXM_NX_REG12[0..15],nat(src=fd11::2))
+has prereqs ip
 
 ct_snat(192.168.1.2, 192.168.1.3);
 Syntax error at `,' expecting `)'.
 ct_snat(foo);
-Syntax error at `foo' expecting IPv4 address.
+Syntax error at `foo' expecting IPv4 or IPv6 address.
 ct_snat(foo, bar);
-Syntax error at `foo' expecting IPv4 address.
+Syntax error at `foo' expecting IPv4 or IPv6 address.
 ct_snat();
-Syntax error at `)' expecting IPv4 address.
+Syntax error at `)' expecting IPv4 or IPv6 address.
 
 # ct_clear
 ct_clear;
diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c
index c95ac

[ovs-dev] [PATCH ovn 0/6] Add IPv6 NAT support

2019-10-29 Thread Russell Bryant
This came up with reviewing the usage of ovn-kubernetes with IPv6.
It's more straight forward to start with using IPv6 NAT, matching the
IPv4 network topology with Kubernetes.  Eventually, we'd want to allow
routable IPv6 addresses everywhere, but this gives us the option of
using NAT where it's helpful.

[PATCH 1/6] northd: Fix table ID for IPv6 router ingress.
[PATCH 2/6] actions: Add IPv6 support to lflow NAT actions
[PATCH 3/6] ovn-nbctl: Allow IPv6 NAT rules to be added
[PATCH 4/6] northd: Add lflows for IPv6 NAT.
[PATCH 5/6] system-ovn: Add IPv6 NAT test cases
[PATCH 6/6] NEWS: Add IPv6 NAT support

 NEWS  |6 
 include/ovn/actions.h |6 
 lib/actions.c |   35 +-
 northd/ovn-northd.c   |  378 -
 tests/ovn-nbctl.at|   41 +-
 tests/ovn.at  |   18 -
 tests/system-ovn.at   |  862 +-
 utilities/ovn-nbctl.c |   48 ++
 utilities/ovn-trace.c |   14 
 9 files changed, 1266 insertions(+), 142 deletions(-)

--
Russell Bryant

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn 18/19] ovn-ctl: Support commands for interconnection.

2019-10-29 Thread aginwala
Thanks Han for the patches. OVN-ctl doesn't seem to accept inb/sb args.

On Sun, Oct 20, 2019 at 5:55 PM Han Zhou  wrote:

> Add support for managing IC-NB and IC-SB DBs, and ovn-ic daemon.
>
> Signed-off-by: Han Zhou 
> ---
>  utilities/ovn-ctl   | 362
> +++-
>  utilities/ovn-ctl.8.xml |  91 
>  2 files changed, 452 insertions(+), 1 deletion(-)
>
> diff --git a/utilities/ovn-ctl b/utilities/ovn-ctl
> index 2e4e773..f8d23a3 100755
> --- a/utilities/ovn-ctl
> +++ b/utilities/ovn-ctl
> @@ -33,6 +33,9 @@ done
>  ovnnb_active_conf_file="$ovn_etcdir/ovnnb-active.conf"
>  ovnsb_active_conf_file="$ovn_etcdir/ovnsb-active.conf"
>  ovn_northd_db_conf_file="$ovn_etcdir/ovn-northd-db-params.conf"
> +ovninb_active_conf_file="$ovn_etcdir/ovninb-active.conf"
> +ovnisb_active_conf_file="$ovn_etcdir/ovnisb-active.conf"
> +ovn_ic_db_conf_file="$ovn_etcdir/ovn-ic-db-params.conf"
>  ## - ##
>  ## start ##
>  ## - ##
> @@ -61,6 +64,19 @@ stop_ovsdb () {
>  stop_sb_ovsdb
>  }
>
> +stop_inb_ovsdb() {
> +stop_xx_ovsdb $DB_INB_PID ovninb_db.ctl
> +}
> +
> +stop_isb_ovsdb() {
> +stop_xx_ovsdb $DB_ISB_PID ovnisb_db.ctl
> +}
> +
> +stop_ic_ovsdb () {
> +stop_inb_ovsdb
> +stop_isb_ovsdb
> +}
> +
>  demote_xx_ovsdb () {
>  local sync_from_addr=$1
>  local sync_from_proto=$2
> @@ -91,6 +107,16 @@ demote_ovnsb() {
>  $DB_SB_SYNC_FROM_PORT $ovnsb_active_conf_file
> ovnsb_db.ctl
>  }
>
> +demote_ovninb() {
> +demote_xx_ovsdb $DB_INB_SYNC_FROM_ADDR $DB_INB_SYNC_FROM_PROTO \
> +$DB_INB_SYNC_FROM_PORT $ovninb_active_conf_file
> ovninb_db.ctl
> +}
> +
> +demote_ovnisb() {
> +demote_xx_ovsdb $DB_ISB_SYNC_FROM_ADDR $DB_ISB_SYNC_FROM_PROTO \
> +$DB_ISB_SYNC_FROM_PORT $ovnisb_active_conf_file
> ovnisb_db.ctl
> +}
> +
>  promote_xx_ovsdb() {
>  local active_conf_file=$1
>  local ctl_file=$2
> @@ -106,6 +132,14 @@ promote_ovnsb() {
>  promote_xx_ovsdb $ovnsb_active_conf_file ovnsb_db.ctl
>  }
>
> +promote_ovninb() {
> +promote_xx_ovsdb $ovninb_active_conf_file ovninb_db.ctl
> +}
> +
> +promote_ovnisb() {
> +promote_xx_ovsdb $ovnisb_active_conf_file ovnisb_db.ctl
> +}
> +
>  start_ovsdb__() {
>  local DB=$1 db=$2 schema_name=$3 table_name=$4
>  local db_pid_file
> @@ -284,6 +318,19 @@ start_ovsdb () {
>  start_sb_ovsdb
>  }
>
> +start_inb_ovsdb() {
> +start_ovsdb__ INB inb OVN_IC_Northbound INB_Global
> +}
> +
> +start_isb_ovsdb() {
> +start_ovsdb__ ISB isb OVN_IC_Southbound ISB_Global
> +}
> +
> +start_ic_ovsdb () {
> +start_inb_ovsdb
> +start_isb_ovsdb
> +}
> +
>  sync_status() {
>  ovn-appctl -t $OVN_RUNDIR/ovn${1}_db.ctl ovsdb-server/sync-status |
> awk '{if(NR==1) print $2}'
>  }
> @@ -318,6 +365,36 @@ status_ovsdb () {
>fi
>  }
>
> +status_ovninb() {
> +if ! pidfile_is_running $DB_INB_PID; then
> +echo "not-running"
> +else
> +echo "running/$(sync_status inb)"
> +fi
> +}
> +
> +status_ovnisb() {
> +if ! pidfile_is_running $DB_ISB_PID; then
> +echo "not-running"
> +else
> +echo "running/$(sync_status isb)"
> +fi
> +}
> +
> +status_ic_ovsdb () {
> +  if ! pidfile_is_running $DB_INB_PID; then
> +  log_success_msg "OVN IC-Northbound DB is not running"
> +  else
> +  log_success_msg "OVN IC-Northbound DB is running"
> +  fi
> +
> +  if ! pidfile_is_running $DB_ISB_PID; then
> +  log_success_msg "OVN IC-Southbound DB is not running"
> +  else
> +  log_success_msg "OVN IC-Southbound DB is running"
> +  fi
> +}
> +
>  run_nb_ovsdb() {
>  DB_NB_DETACH=no
>  start_nb_ovsdb
> @@ -328,6 +405,16 @@ run_sb_ovsdb() {
>  start_sb_ovsdb
>  }
>
> +run_inb_ovsdb() {
> +DB_INB_DETACH=no
> +start_inb_ovsdb
> +}
> +
> +run_isb_ovsdb() {
> +DB_ISB_DETACH=no
> +start_isb_ovsdb
> +}
> +
>  start_northd () {
>  if [ ! -e $ovn_northd_db_conf_file ]; then
>  if test X"$OVN_MANAGE_OVSDB" = Xyes; then
> @@ -373,6 +460,41 @@ start_northd () {
>  fi
>  }
>
> +start_ic () {
> +if [ ! -e $ovn_ic_db_conf_file ]; then
> +ovn_ic_params="--ovnnb-db=$OVN_NORTHD_NB_DB \
> +   --ovnsb-db=$OVN_NORTHD_SB_DB \
> +   --ovninb-db=$OVN_IC_NB_DB \
> +   --ovnisb-db=$OVN_IC_SB_DB"
> +else
> +ovn_ic_params="`cat $ovn_ic_db_conf_file`"
> +fi
> +
> +if daemon_is_running ovn-ic; then
> +log_success_msg "ovn-ic is already running"
> +else
> +set ovn-ic
> +if test X"$OVN_IC_LOGFILE" != X; then
> +set "$@" --log-file=$OVN_IC_LOGFILE
> +fi
> +if test X"$OVN_IC_SSL_KEY" != X; then
> +set "$@" --private-key=$OVN_IC_SSL_KEY
> +fi
> +if test X"$OVN_IC_SSL_CERT" != X; then
> +set "$@" --certificate=$OVN_IC_SSL_CERT
> +fi
> +if test X"$OVN_IC_SSL_CA_CERT" != X; then
> +  

Re: [ovs-dev] [PATCH ovn 10/19] ovn-ic: Interconnection controller with AZ registeration.

2019-10-29 Thread aginwala
Thanks Han for the patches.

On Sun, Oct 20, 2019 at 5:54 PM Han Zhou  wrote:

> This patch introduces interconnection controller, ovn-ic, and
> implements the basic AZ registration feature: taking the AZ
> name from NB DB and create an Availability_Zone entry in
> IC-SB DB.
>
> Signed-off-by: Han Zhou 
> ---
>  Makefile.am  |   1 +
>  ic/.gitignore|   2 +
>  ic/automake.mk   |  10 ++
>  ic/ovn-ic.8.xml  | 111 ++
>  ic/ovn-ic.c  | 413
> +++
>  ovn-nb.ovsschema |   5 +-
>  ovn-nb.xml   |   7 +
>  tests/automake.mk|   4 +-
>  tests/ovn-ic.at  |  29 
>  tests/ovn-macros.at  | 161 
>  tests/testsuite.at   |   1 +
>  tutorial/ovs-sandbox |  78 +-
>  12 files changed, 792 insertions(+), 30 deletions(-)
>  create mode 100644 ic/.gitignore
>  create mode 100644 ic/automake.mk
>  create mode 100644 ic/ovn-ic.8.xml
>  create mode 100644 ic/ovn-ic.c
>  create mode 100644 tests/ovn-ic.at
>
> diff --git a/Makefile.am b/Makefile.am
> index 33c18c5..d22a220 100644
> --- a/Makefile.am
> +++ b/Makefile.am
> @@ -500,4 +500,5 @@ include selinux/automake.mk
>  include controller/automake.mk
>  include controller-vtep/automake.mk
>  include northd/automake.mk
> +include ic/automake.mk
>  include build-aux/automake.mk
> diff --git a/ic/.gitignore b/ic/.gitignore
> new file mode 100644
> index 000..1b73eb4
> --- /dev/null
> +++ b/ic/.gitignore
> @@ -0,0 +1,2 @@
> +/ovn-ic
> +/ovn-ic.8
> diff --git a/ic/automake.mk b/ic/automake.mk
> new file mode 100644
> index 000..8e71bc3
> --- /dev/null
> +++ b/ic/automake.mk
> @@ -0,0 +1,10 @@
> +# ovn-ic
> +bin_PROGRAMS += ic/ovn-ic
> +ic_ovn_ic_SOURCES = ic/ovn-ic.c
> +ic_ovn_ic_LDADD = \
> +   lib/libovn.la \
> +   $(OVSDB_LIBDIR)/libovsdb.la \
> +   $(OVS_LIBDIR)/libopenvswitch.la
> +man_MANS += ic/ovn-ic.8
> +EXTRA_DIST += ic/ovn-ic.8.xml
> +CLEANFILES += ic/ovn-ic.8
> diff --git a/ic/ovn-ic.8.xml b/ic/ovn-ic.8.xml
> new file mode 100644
> index 000..00f33aa
> --- /dev/null
> +++ b/ic/ovn-ic.8.xml
> @@ -0,0 +1,111 @@
> +
> +
> +Name
> +ovn-ic -- Open Virtual Network interconnection controller
> +
> +Synopsis
> +ovn-ic [options]
> +
> +Description
> +
> +  ovn-ic, OVN interconnection controller, is a
> centralized
> +  daemon which communicates with global interconnection databases
> INB/ISB
> +  to configure and exchange data with local NB/SB for interconnecting
> +  with other OVN deployments.
> +
> +
> +Options
> +
> +  --ovnnb-db=database
> +  
> +The OVSDB database containing the OVN Northbound Database.  If the
> +OVN_NB_DB environment variable is set, its value is
> used
> +as the default.  Otherwise, the default is
> +unix:@RUNDIR@/ovnnb_db.sock.
> +  
> +  --ovnsb-db=database
> +  
> +The OVSDB database containing the OVN Southbound Database.  If the
> +OVN_SB_DB environment variable is set, its value is
> used
> +as the default.  Otherwise, the default is
> +unix:@RUNDIR@/ovnsb_db.sock.
> +  
> +  --ovninb-db=database
> +  
> +The OVSDB database containing the OVN Interconnection Northbound
> +Database.  If the OVN_INB_DB environment variable is
> set,
> +its value is used as the default.  Otherwise, the default is
> +unix:@RUNDIR@/ovninb_db.sock.
> +  
> +  --ovnisb-db=database
> +  
> +The OVSDB database containing the OVN Interconnection Southbound
> +Database.  If the OVN_ISB_DB environment variable is
> set,
> +its value is used as the default.  Otherwise, the default is
> +unix:@RUNDIR@/ovnisb_db.sock.
> +  
> +
> +
> +  database in the above options must be an OVSDB active or
> +  passive connection method, as described in ovsdb(7).
> +
> +
> +Daemon Options
> +http://www.w3.org/2003/XInclude"/>
> +
> +Logging Options
> +http://www.w3.org/2003/XInclude"/>
> +
> +PKI Options
> +
> +  PKI configuration is required in order to use SSL for the
> connections to
> +  the Northbound and Southbound databases.
> +
> +http://www.w3.org/2003/XInclude"/>
> +
> +Other Options
> + + xmlns:xi="http://www.w3.org/2003/XInclude"/>
> +
> + + xmlns:xi="http://www.w3.org/2003/XInclude"/>
> +
> +Runtime Management Commands
> +
> +  ovs-appctl can send commands to a running
> +  ovn-ic process.  The currently supported commands
> +  are described below.
> +  
> +  exit
> +  
> +Causes ovn-ic to gracefully terminate.
> +  
> +
> +  pause
> +  
> +Pauses the ovn-ic operation from processing any Northbound and
> +Southbound database changes.
> +  
> +
> +  resume
> +  
> +Resumes the ovn-ic operation to process Northbound and
> +So

Re: [ovs-dev] [PATCH ovn 19/19] tutorial: Add tutorial for OVN Interconnection.

2019-10-29 Thread aginwala
Thanks Han for the correction. Just one more minor typo in the tutorial
below.

I hit some roadblocks to start ic controller on different az but got my
setup running with workarounds in current code and have posted in comments
which needs fix for sure. I tried with 2 different ovn setups with 2 AZs
where each ovn az uses1 hv, 1 gw and 1 lport bound to hv. I am able to
access the workloads across azs.


Tested-by: Aliasgar Ginwala >



On Sun, Oct 20, 2019 at 5:55 PM Han Zhou  wrote:

> Added tutorial, and also updated NEWS and TODO.
>
> Signed-off-by: Han Zhou 
> ---
>  Documentation/automake.mk   |   1 +
>  Documentation/tutorials/index.rst   |   1 +
>  Documentation/tutorials/ovn-interconnection.rst | 181
> 
>  NEWS|   5 +
>  TODO.rst|  10 ++
>  5 files changed, 198 insertions(+)
>  create mode 100644 Documentation/tutorials/ovn-interconnection.rst
>
> diff --git a/Documentation/automake.mk b/Documentation/automake.mk
> index 5968d69..15d261d 100644
> --- a/Documentation/automake.mk
> +++ b/Documentation/automake.mk
> @@ -20,6 +20,7 @@ DOC_SOURCE = \
> Documentation/tutorials/ovn-sandbox.rst \
> Documentation/tutorials/ovn-ipsec.rst \
> Documentation/tutorials/ovn-rbac.rst \
> +   Documentation/tutorials/ovn-interconnection.rst \
> Documentation/topics/index.rst \
> Documentation/topics/testing.rst \
> Documentation/topics/high-availability.rst \
> diff --git a/Documentation/tutorials/index.rst
> b/Documentation/tutorials/index.rst
> index 1cf083e..4ff6e16 100644
> --- a/Documentation/tutorials/index.rst
> +++ b/Documentation/tutorials/index.rst
> @@ -43,3 +43,4 @@ vSwitch.
> ovn-openstack
> ovn-rbac
> ovn-ipsec
> +   ovn-interconnection
> diff --git a/Documentation/tutorials/ovn-interconnection.rst
> b/Documentation/tutorials/ovn-interconnection.rst
> new file mode 100644
> index 000..1320d41
> --- /dev/null
> +++ b/Documentation/tutorials/ovn-interconnection.rst
> @@ -0,0 +1,181 @@
> +..
> +  Licensed under the Apache License, Version 2.0 (the "License"); you
> may
> +  not use this file except in compliance with the License. You may
> obtain
> +  a copy of the License at
> +
> +  http://www.apache.org/licenses/LICENSE-2.0
> +
> +  Unless required by applicable law or agreed to in writing, software
> +  distributed under the License is distributed on an "AS IS" BASIS,
> WITHOUT
> +  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> See the
> +  License for the specific language governing permissions and
> limitations
> +  under the License.
> +
> +  Convention for heading levels in OVN documentation:
> +
> +  ===  Heading 0 (reserved for the title in a document)
> +  ---  Heading 1
> +  ~~~  Heading 2
> +  +++  Heading 3
> +  '''  Heading 4
> +
> +  Avoid deeper levels because they do not render well.
> +
> +===
> +OVN Interconnection
> +===
> +
> +This document provides a guide for interconnecting multiple OVN
> deployements
> +with OVN managed tunneling.  More details about the OVN Interconnectiong
> design
> +can be found in ``ovn-architecture``\(7) manpage.
> +
> +This document assumes two or more OVN deployments are setup and runs
> normally,
> +possibly at different data-centers, and the gateway chassises of each OVN
> +are with IP addresses that are reachable between each other.
> +
> +Setup Interconnection Databases
> +---
> +
> +To interconnect different OVNs, you need to create global OVSDB databases
> that
> +store interconnection data.  The databases can be setup on any nodes that
> are
> +accessible from all the central nodes of each OVN deployment.  It is
> +recommended that the global databases are setup with HA, with nodes in
> +different avaialbility zones, to avoid single point of failure.
> +
> +1. Install OVN packages on each global database node.
> +
> +2. Start OVN IC-NB and IC-SB databases.
> +
> +   On each global database node ::
> +
> +$ ovn-ctl [options] start_ic_ovsdb
> +
> +   Options depends on the HA mode you use.  See details with ::
> +
> +$ ovn-ctl --help.
> +
> +Register OVN to Interconnection Databases
> +-
> +
> +For each OVN deployment, set an availability zone name ::
> +
> +$ ovn-nbctl set NB_Global . name=
> +
> +The name should be unique across all OVN deployments, e.g. ovn-east,
> +ovn-west, etc.
> +
> +For each OVN deployment, start the ``ovn-ic`` daemon on central nodes ::
> +
> +$ ovn-ctl --ovninb-db= --ovnisb-db= \
> +  --ovnnb-db= --ovnsb-db= [more options] start_ic
> +
> +An example of  is ``tcp::6645``, or for
> +clustered DB: ``tcp::6645,tcp::6645,tcp::6645``.
> + is similar, but usually with a different port nu

Re: [ovs-dev] [PATCH v3 1/2 ovn] OVN: ADD nbctl cli to mark a dnat_and_snat rule as stateless

2019-10-29 Thread 0-day Robot
Bleep bloop.  Greetings Ankur Sharma, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 220 characters long (recommended limit is 79)
#121 FILE: utilities/ovn-nbctl.8.xml:668:
  [--may-exist] 
[--stateless]lr-nat-add router 
type external_ip logical_ip 
[logical_port external_mac]

Lines checked: 220, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] New PO

2019-10-29 Thread Sui
Good Day,

Please do check our new order list and send quote us.

Get back to us with best price confirmation.


Thanks Regards,

Sui
Purchase Manager,___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3 2/2 ovn] OVN: Use ip4.src and ip4.dst actions for NAT rules

2019-10-29 Thread Ankur Sharma
For dnat_and_snat rules which are meant to be stateless
instead of using ct_snat/dnat OVN actions, we will use
ip4.src/ip4.dst.

This actions will do 1:1 mapping to inner ip to external ip,
while recalculating the checksums.

Signed-off-by: Ankur Sharma 
---
 northd/ovn-northd.8.xml |  34 --
 northd/ovn-northd.c |  86 +++--
 tests/ovn-northd.at |  57 +
 tests/ovn.at| 311 
 4 files changed, 472 insertions(+), 16 deletions(-)

diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml
index d3e0e5e..87a6909 100644
--- a/northd/ovn-northd.8.xml
+++ b/northd/ovn-northd.8.xml
@@ -1807,7 +1807,9 @@ icmp6 {
   to change the source IP address of a packet from A to
   B, a priority-90 flow matches ip &&
   ip4.dst == B with an action
-  ct_snat; .
+  ct_snat; . If the NAT rule is of type dnat_and_snat
+  and has stateless=true in the options, then the action
+  would be ip4.dst=(B).
 
 
 
@@ -1827,7 +1829,10 @@ icmp6 {
   B, a priority-100 flow matches ip &&
   ip4.dst == B && inport == GW,
   where GW is the logical router gateway port, with an
-  action ct_snat;.
+  action ct_snat;. If the NAT rule is of type
+  dnat_and_snat and has stateless=true in the
+  options, then the action would be ip4.dst=
+  (B).
 
 
 
@@ -1947,7 +1952,10 @@ icmp6 {
 Gateway router is configured to force SNAT any DNATed packet,
 the above action will be replaced by
 flags.force_snat_for_dnat = 1; flags.loopback = 1;
-ct_dnat(B);.
+ct_dnat(B);. If the NAT rule is of type
+dnat_and_snat and has stateless=true in the
+options, then the action would be ip4.dst=
+(B).
   
 
   
@@ -1979,7 +1987,10 @@ icmp6 {
   B, a priority-100 flow matches ip &&
   ip4.dst == B && inport == GW,
   where GW is the logical router gateway port, with an
-  action ct_dnat(B);.
+  action ct_dnat(B);. If the NAT rule is of
+  type dnat_and_snat and has stateless=true in the
+  options, then the action would be ip4.dst=
+  (B).
 
 
 
@@ -2653,7 +2664,10 @@ nd_ns {
   matches ip && ip4.src == B
   && outport == GW, where GW
   is the logical router gateway port, with an action
-  ct_dnat;.
+  ct_dnat;. If the NAT rule is of type
+  dnat_and_snat and has stateless=true in the
+  options, then the action would be ip4.src=
+  (B).
 
 
 
@@ -2711,7 +2725,10 @@ nd_ns {
   ip && ip4.src == A with an action
   ct_snat(B);.  The priority of the flow
   is calculated based on the mask of A, with matches
-  having larger masks getting higher priorities.
+  having larger masks getting higher priorities. If the NAT rule is
+  of type dnat_and_snat and has stateless=true in the
+  options, then the action would be ip4.src=
+  (B).
 
 
   A priority-0 logical flow with match 1 has actions
@@ -2734,7 +2751,10 @@ nd_ns {
   logical router gateway port, with an action
   ct_snat(B);.  The priority of the flow
   is calculated based on the mask of A, with matches
-  having larger masks getting higher priorities.
+  having larger masks getting higher priorities. If the NAT rule
+  is of type dnat_and_snat and has stateless=true
+  in the options, then the action would be ip4.src=
+  (B).
 
 
 
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 194e4bf..4431659 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -6500,6 +6500,18 @@ copy_ra_to_sb(struct ovn_port *op, const char 
*address_mode)
 smap_destroy(&options);
 }
 
+static inline bool
+lrouter_nat_is_stateless(const struct nbrec_nat *nat)
+{
+const char *stateless = smap_get(&nat->options, "stateless");
+
+if (stateless && !strcmp(stateless, "true")) {
+return true;
+}
+
+return false;
+}
+
 static void
 build_lrouter_flows(struct hmap *datapaths, struct hmap *ports,
 struct hmap *lflows, struct shash *meter_groups)
@@ -7261,6 +7273,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
 nat = od->nbr->nat[i];
 
 ovs_be32 ip, mask;
+bool stateless = lrouter_nat_is_stateless(nat);
 
 char *error = ip_parse_masked(nat->external_ip, &ip, &mask);
 if (error || mask != OVS_BE32_MAX) {
@@ -7326,15 +7339,26 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap 
*ports,
 if (!od->l3dgw_port) {
 /* Gateway router. */
 ds_clear(&match);
+ds_clear(&actions);
 ds_put

[ovs-dev] [PATCH v3 1/2 ovn] OVN: ADD nbctl cli to mark a dnat_and_snat rule as stateless

2019-10-29 Thread Ankur Sharma
Adding ovn-nbctl to mark a dnat_and_snat rule as stateless.
This configuration will added to "options" column of NAT table.

Signed-off-by: Ankur Sharma 
---
 ovn-nb.ovsschema  |  6 --
 ovn-nb.xml|  5 +
 tests/ovn-nbctl.at| 29 +
 utilities/ovn-nbctl.8.xml | 12 +++-
 utilities/ovn-nbctl.c | 30 +-
 5 files changed, 78 insertions(+), 4 deletions(-)

diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema
index 2c87cbb..084305b 100644
--- a/ovn-nb.ovsschema
+++ b/ovn-nb.ovsschema
@@ -1,7 +1,7 @@
 {
 "name": "OVN_Northbound",
-"version": "5.16.0",
-"cksum": "923459061 23095",
+"version": "5.17.0",
+"cksum": "1128988054 23237",
 "tables": {
 "NB_Global": {
 "columns": {
@@ -345,6 +345,8 @@
  "snat",
  "dnat_and_snat"
]]}}},
+"options": {"type": {"key": "string", "value": "string",
+ "min": 0, "max": "unlimited"}},
 "external_ids": {
 "type": {"key": "string", "value": "string",
  "min": 0, "max": "unlimited"}}},
diff --git a/ovn-nb.xml b/ovn-nb.xml
index 8990894..d8f3237 100644
--- a/ovn-nb.xml
+++ b/ovn-nb.xml
@@ -2297,6 +2297,11 @@
   
 
 
+
+  Indicates if a dnat_and_snat rule should lead to connection
+  tracking state or not.
+
+
 
   
 See External IDs at the beginning of this document.
diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at
index 01091dd..acbe73c 100644
--- a/tests/ovn-nbctl.at
+++ b/tests/ovn-nbctl.at
@@ -516,6 +516,31 @@ dnat_and_snat30.0.0.2   192.168.1.3
 snat 30.0.0.1   192.168.1.0/24
 ])
 
+AT_CHECK([ovn-nbctl --bare --columns=options list nat | grep stateless=true| 
wc -l], [0],
+[0
+])
+AT_CHECK([ovn-nbctl --stateless lr-nat-add lr0 dnat_and_snat 40.0.0.2 
192.168.1.4])
+AT_CHECK([ovn-nbctl --bare --columns=options list nat | grep stateless=true| 
wc -l], [0],
+[1
+])
+AT_CHECK([ovn-nbctl --stateless lr-nat-add lr0 dnat 40.0.0.2 192.168.1.4], 
[1], [],
+[ovn-nbctl: stateless is not applicable to dnat or snat types
+])
+AT_CHECK([ovn-nbctl --stateless lr-nat-add lr0 snat 40.0.0.2 192.168.1.4], 
[1], [],
+[ovn-nbctl: stateless is not applicable to dnat or snat types
+])
+AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 40.0.0.2 192.168.1.5], [1], [],
+[ovn-nbctl: 40.0.0.2, 192.168.1.5: External ip cannot be shared across 
stateless and stateful NATs
+])
+AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 40.0.0.2 192.168.1.5], [1], [],
+[ovn-nbctl: 40.0.0.2, 192.168.1.5: External ip cannot be shared across 
stateless and stateful NATs
+])
+
+AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 40.0.0.3 192.168.1.6])
+AT_CHECK([ovn-nbctl --stateless lr-nat-add lr0 dnat_and_snat 40.0.0.3 
192.168.1.7], [1], [],
+[ovn-nbctl: 40.0.0.3, 192.168.1.7: External ip cannot be shared across 
stateless and stateful NATs
+])
+
 dnl Deletes the NATs
 AT_CHECK([ovn-nbctl lr-nat-del lr0 dnat_and_snat 30.0.0.3], [1], [],
 [ovn-nbctl: no matching NAT with the type (dnat_and_snat) and external_ip 
(30.0.0.3)
@@ -533,14 +558,18 @@ AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl
 TYPE EXTERNAL_IPLOGICAL_IPEXTERNAL_MAC 
LOGICAL_PORT
 dnat 30.0.0.1   192.168.1.2
 dnat_and_snat30.0.0.2   192.168.1.3
+dnat_and_snat40.0.0.2   192.168.1.4
 snat 30.0.0.1   192.168.1.0/24
+snat 40.0.0.3   192.168.1.6
 ])
 
 AT_CHECK([ovn-nbctl lr-nat-del lr0 dnat])
 AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl
 TYPE EXTERNAL_IPLOGICAL_IPEXTERNAL_MAC 
LOGICAL_PORT
 dnat_and_snat30.0.0.2   192.168.1.3
+dnat_and_snat40.0.0.2   192.168.1.4
 snat 30.0.0.1   192.168.1.0/24
+snat 40.0.0.3   192.168.1.6
 ])
 
 AT_CHECK([ovn-nbctl lr-nat-del lr0])
diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml
index fd75c0e..2161d8c 100644
--- a/utilities/ovn-nbctl.8.xml
+++ b/utilities/ovn-nbctl.8.xml
@@ -665,7 +665,7 @@
 NAT Commands
 
 
-  [--may-exist] lr-nat-add router 
type external_ip logical_ip 
[logical_port external_mac]
+  [--may-exist] 
[--stateless]lr-nat-add router 
type external_ip logical_ip 
[logical_port external_mac]
   
 
   Adds the specified NAT to router.
@@ -681,8 +681,18 @@
   The logical_port is the name of an existing logical
   switch port where the logical_ip resides.
   The external_mac is an Ethernet address.
+  The --stateless
 
 
+  When --stateless is specified then it implies that
+  we will be not use 

[ovs-dev] [PATCH v3 0/2] ALLOW Stateless NAT operations

2019-10-29 Thread Ankur Sharma
NAT implementation in OVN uses connection tracker to replace
source and dest ips. This implementation works fine and
is the right approach for cases where external ips are shared
(i.e. SNAT) or where we replace ip only when relevant flow is there
(i.e. DNAT).

However, it opens the possibility of Dos Attack, where attacker
can easily simluate multiple 5 tuples, to consume the connection
tracker entry in an OVN chassis. This way they can easily attain
the CT limit, there by impacting the usage of it by other features
like valid NAT, ACL etc.

This attack is even worse, when external ip is a public ip,
i.e internet routable ip.

In this patch we are introducing an option with NAT table entry.
Option "stateless=true" indicates that NAT implmentation
should not be using CT, i.e it should not use ct_snat/dnat actions.

Instead of ct_* actions, we will use ip4.src/dst OVN actions, which
will replace source and destination ips, while recalculating the
checksums.

This option is applicable only for the NAT rules which can be
1:1 mapped between inner and external ips, i.e dnat_and_snat rule.

Signed-off-by: Ankur Sharma 

Ankur Sharma (2):
  OVN: ADD nbctl cli to mark a dnat_and_snat rule as stateless
  OVN: Use ip4.src and ip4.dst actions for NAT rules

 northd/ovn-northd.8.xml   |  34 +++--
 northd/ovn-northd.c   |  86 +++--
 ovn-nb.ovsschema  |   6 +-
 ovn-nb.xml|   5 +
 tests/ovn-nbctl.at|  29 +
 tests/ovn-northd.at   |  57 +
 tests/ovn.at  | 311 ++
 utilities/ovn-nbctl.8.xml |  12 +-
 utilities/ovn-nbctl.c |  30 -
 9 files changed, 550 insertions(+), 20 deletions(-)

-- 
1.8.3.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v2] travis: support ppc64le builds

2019-10-29 Thread David Wilder
Add support for travis-ci ppc64le builds.

- Updated matrix in .travis.yml to include a ppc64le build.
- Move package install needed for 32bit builds to .travis/linux-prepare.sh.

To keep the total build time at an acceptable level only a single build
job is included in the matrix for ppc64le.

A build report example can be found here [1]
[0] http://travis-ci.org/
[1] https://travis-ci.org/djlwilder/ovs/builds/604615966

Signed-off-by: David Wilder 
---
 .travis.yml  | 5 +++--
 .travis/linux-prepare.sh | 5 -
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5676d9748..c99f26815 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,6 @@ addons:
   apt:
 packages:
   - bc
-  - gcc-multilib
   - libssl-dev
   - llvm-dev
   - libjemalloc1
@@ -24,7 +23,6 @@ addons:
   - libelf-dev
   - selinux-policy-dev
   - libunbound-dev
-  - libunbound-dev:i386
   - libunwind-dev
 
 before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
@@ -50,6 +48,9 @@ matrix:
 - os: osx
   compiler: clang
   env: OPTS="--disable-ssl"
+- os: linux-ppc64le
+  compiler: gcc
+  env: OPTS="--disable-ssl"
 
 script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS
 
diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh
index e546d32cb..299f9de99 100755
--- a/.travis/linux-prepare.sh
+++ b/.travis/linux-prepare.sh
@@ -18,5 +18,8 @@ pip install --user --upgrade docutils
 if [ "$M32" ]; then
 # 32-bit and 64-bit libunwind can not be installed at the same time.
 # This will remove the 64-bit libunwind and install 32-bit version.
-sudo apt-get install -y libunwind-dev:i386
+sudo apt-get install -y \
+ gcc-multilib \
+ libunwind-dev:i386 \
+ libunbound-dev:i386
 fi
-- 
2.23.0.162.gf1d4a28

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v4 08/10] net: openvswitch: fix possible memleak on destroy flow-table

2019-10-29 Thread Pravin Shelar
On Tue, Oct 29, 2019 at 4:31 AM Tonghao Zhang  wrote:
>
> On Tue, Oct 29, 2019 at 3:38 PM Pravin Shelar  wrote:
> >
> > On Sun, Oct 27, 2019 at 11:49 PM Tonghao Zhang  
> > wrote:
> > >
> > > On Thu, Oct 24, 2019 at 3:14 PM Pravin Shelar  wrote:
> > > >
> > > > On Tue, Oct 22, 2019 at 7:35 PM Tonghao Zhang 
> > > >  wrote:
> > > > >
> > > > > On Tue, Oct 22, 2019 at 2:58 PM Pravin Shelar  wrote:
> > > > > >
> > > > ...
> > > >
> > ...
> > > > >  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
> > > > > @@ -400,10 +458,9 @@ static struct table_instance
> > > > > *table_instance_rehash(struct table_instance *ti,
> > > > > return new_ti;
> > > > >  }
> > > > >
> > > > > -int ovs_flow_tbl_flush(struct flow_table *flow_table)
> > > > > +int ovs_flow_tbl_flush(struct flow_table *table)
> > > > >  {
> > > > > -   struct table_instance *old_ti, *new_ti;
> > > > > -   struct table_instance *old_ufid_ti, *new_ufid_ti;
> > > > > +   struct table_instance *new_ti, *new_ufid_ti;
> > > > >
> > > > > new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
> > > > > if (!new_ti)
> > > > > @@ -412,16 +469,12 @@ int ovs_flow_tbl_flush(struct flow_table 
> > > > > *flow_table)
> > > > > if (!new_ufid_ti)
> > > > > goto err_free_ti;
> > > > >
> > > > > -   old_ti = ovsl_dereference(flow_table->ti);
> > > > > -   old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
> > > > > +   table_instance_destroy(table, true);
> > > > >
> > > > This would destroy running table causing unnecessary flow miss. Lets
> > > > keep current scheme of setting up new table before destroying current
> > > > one.
> > > >
> > > > > -   rcu_assign_pointer(flow_table->ti, new_ti);
> > 
> > ...
> > >  /* Must be called with OVS mutex held. */
> > >  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
> > >  {
> > > @@ -752,17 +794,7 @@ void ovs_flow_tbl_remove(struct flow_table
> > > *table, struct sw_flow *flow)
> > > struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
> > >
> > > BUG_ON(table->count == 0);
> > > -   hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
> > > -   table->count--;
> > > -   if (ovs_identifier_is_ufid(&flow->id)) {
> > > -   hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
> > > -   table->ufid_count--;
> > > -   }
> > > -
> > > -   /* RCU delete the mask. 'flow->mask' is not NULLed, as it should 
> > > be
> > > -* accessible as long as the RCU read lock is held.
> > > -*/
> > > -   flow_mask_remove(table, flow->mask);
> > > +   table_instance_remove(table, ti, ufid_ti, flow, true);
> > >  }
> > Lets rename table_instance_remove() to imply it is freeing a flow.
> hi Pravin, the function ovs_flow_free will free the flow actually. In
> -ovs_flow_cmd_del
> ovs_flow_tbl_remove
> ...
> ovs_flow_free
>
> In -table_instance_destroy
> table_instance_remove
> ovs_flow_free
>
> But if rename the table_instance_remove, table_instance_flow_free ?
table_instance_flow_free() looks good.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Comunicación efectiva y empática del coach

2019-10-29 Thread Las 6 C’s del coaching exitoso
26 de Noviembre | Horario de 10:00 a 17:00 hrs.  |  (hora del centro de México) 

- Coaching y liderazgo efectivo. - 


¿De qué hablaremos?

Este webinar tiene como finalidad llevar a los participantes a ser capaces de 
intervenir positivamente en otros individuos en
funciones de liderazgo, comprendiendo diversos estilos y aplicando en la 
práctica acciones que permitan mejorar la toma de decisiones
para alcanzar los objetivos de su organización.

¿Qué aprenderás?:

- Diferencia entre liderazgo y dirección.
- Definiciones del Coaching.
- Beneficios del Coaching.
- Qué no es Coaching?
- Las 6 C’s del coaching exitoso.
- Técnicas de coaching de liderazgo efectivo.
- Comunicación efectiva y empática del coach.
- Habilidades del Coach.

Solicita información respondiendo a este correo con la palabra Coaching, junto 
con los siguientes datos:

Nombre:
Correo electrónico:
Número telefónico:


Dirigido a: Empresarios, gerentes, líderes de proyecto, jefes de área, 
supervisores y todo aquel que tenga contacto con el personal de
forma directa o indirecta y busque mejorar la capacidad de influir en los demás.

Números de Atención: 

(045) 55 15 54 66 30 - (045) 55 85567293 - (045) 5530167085 

En caso de que haya recibido este correo sin haberlo solicitado o si desea 
dejar de recibir nuestra promoción favor de responder con la palabra baja o 
enviar un correo a bajas@ innovalearn.net


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCHv6] netdev-afxdp: Add need_wakeup supprt.

2019-10-29 Thread Ilya Maximets

On 29.10.2019 0:32, William Tu wrote:

On Mon, Oct 28, 2019 at 12:11 PM Ilya Maximets  wrote:


On 28.10.2019 11:46, Eelco Chaudron wrote:



On 23 Oct 2019, at 23:06, William Tu wrote:


The patch adds support for using need_wakeup flag in AF_XDP rings.
A new option, use_need_wakeup, is added.  When this option is used,
it means that OVS has to explicitly wake up the kernel RX, using poll()
syscall and wake up TX, using sendto() syscall. This feature improves
the performance by avoiding unnecessary sendto syscalls for TX.
For RX, instead of kernel always busy-spinning on fille queue, OVS wakes
up the kernel RX processing when fill queue is replenished.

The need_wakeup feature is merged into Linux kernel bpf-next tee with commit
77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") and
OVS enables it by default, if libbpf supports it.  If users enable it but
runs in an older version of libbpf, then the need_wakeup feature has no effect,
and a warning message is logged.

For virtual interface, it's better set use_need_wakeup=false, since
the virtual device's AF_XDP xmit is synchronous: the sendto syscall
enters kernel and process the TX packet on tx queue directly.

On Intel Xeon E5-2620 v3 2.4GHz system, performance of physical port
to physical port improves from 6.1Mpps to 7.3Mpps.

Suggested-by: Ilya Maximets 
Signed-off-by: William Tu 


Reviewed based on diff from previous version, also did quick compile test with 
and without need_wakeup supported kernel.
No actual performance tests ran, as my setup is in a messed up state :(

One small issue (guess can be fixed at apply time), the subject mentions 
“supprt”, it’s missing an o.

Acked-by: Eelco Chaudron 



Thanks, I could fix the 'supprt' while applying the patch.

But I have one more question.
William, Eelco, what do you think about renaming the option itself from
"options:use_need_wakeup" to "options:use-need-wakeup"?
Most of the netdev options for netdev-dpdk and netdev-linux except few of them
(e.g. n_rxq, n_rxq_desc) uses dashes in their names instead of underscores.
I agree that right now there is a mess in OVS and there is no specified 
convention
for options naming, but it might be good idea to name all the new options in a
similar way, to keep the API more or less consistent.  What do you think?

I could squash in below diff while applying the patch (I also added a NEWS 
entry):


Hi Ilya,

Yes, I'm ok with using dashes.
diff below looks good to me. Thanks
William


Thanks, William and Eelco!
I fixed a typo in the subject line, squashed the diff and applied to master.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ] travis: support ppc64le builds

2019-10-29 Thread dwilder

On 2019-10-29 09:52, Ilya Maximets wrote:

On 28.10.2019 22:22, David Wilder wrote:

Add support for travis-ci ppc64le builds.

- Updated matrix in .travis.yml to include a ppc64le build.
- Added support to install packages needed by specific architectures.

To keep the total build time at an acceptable level only a single 
build

job is included in the matrix for ppc64le.

A build report example can be found here [1]
[0] 
https://urldefense.proofpoint.com/v2/url?u=http-3A__travis-2Dci.org_&d=DwICaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=7ndxyKjH9UrBD68Us2WP1wI4BwEBQbeAyz8i_vwCCaI&m=6JANehIfGoxUMtwHhe4yob4UPeby0Y8ovgzRDIyJZFo&s=UMYL8rzJNph87seC0oJLBiWoe-sUSL80AJy0RMTgBzQ&e= 
[1] 
https://urldefense.proofpoint.com/v2/url?u=https-3A__travis-2Dci.org_djlwilder_ovs_builds_604098141&d=DwICaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=7ndxyKjH9UrBD68Us2WP1wI4BwEBQbeAyz8i_vwCCaI&m=6JANehIfGoxUMtwHhe4yob4UPeby0Y8ovgzRDIyJZFo&s=pyd2yQpQ0snpwGE5El4RYZsatwl74sthM1KLqtIKCnY&e= 
Signed-off-by: David Wilder 


Hi David,
Thanks for working on this. I have a couple of question regarding
ppc64le support by TravisCI.  It seems that they are not supporting
this architecture officially and refusing[1] to solve any issues that
appears while using it. There also no official documentation.
It's kind of a hidden feature that some projects are using for their
own risk [2]. Do you know why this happens or maybe you have some
insights about what is going on/how it works?


Work is going on to increase ppc64le support on Travis by the end of the 
year.  I dont have any details yet. My plan is to keep this to 
build-only ci until then.  Important, ppc64le VM are only available on 
travis-ci.org, they are not available on travis-ci.com.



The API is also a bit strange because Travis started to officially
support arm builds, but this is done via 'arch' knob, not the 'os'.
Will it be changed over time for ppc64le?



Sorry, I dont know.


[1]
https://urldefense.proofpoint.com/v2/url?u=https-3A__travis-2Dci.community_t_ppc64le-2Darch-2Dsupport-2Don-2Dtravis-2Dci-2Dcom-2Dvs-2Dtravis-2Dci-2Dorg_2898_2&d=DwICaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=7ndxyKjH9UrBD68Us2WP1wI4BwEBQbeAyz8i_vwCCaI&m=6JANehIfGoxUMtwHhe4yob4UPeby0Y8ovgzRDIyJZFo&s=TrXdSxjvnbbVQz7EzR5r0aE93lZMSdCiIUQT2wt8E3I&e=
[2]
https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_openssl_openssl_commit_13da3ad00c80e1da816ca27f6c15b0ecee1bb0b8&d=DwICaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=7ndxyKjH9UrBD68Us2WP1wI4BwEBQbeAyz8i_vwCCaI&m=6JANehIfGoxUMtwHhe4yob4UPeby0Y8ovgzRDIyJZFo&s=RWVuli-BT8E2IsW3rAA9MtqCVPZahNk8k7yqxEbgTT4&e=

Few code comments inline.


---
  .travis.yml  |  5 +++--
  .travis/linux-prepare.sh | 18 ++
  2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5676d9748..c99f26815 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,6 @@ addons:
apt:
  packages:
- bc
-  - gcc-multilib
- libssl-dev
- llvm-dev
- libjemalloc1
@@ -24,7 +23,6 @@ addons:
- libelf-dev
- selinux-policy-dev
- libunbound-dev
-  - libunbound-dev:i386
- libunwind-dev
before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh
@@ -50,6 +48,9 @@ matrix:
  - os: osx
compiler: clang
env: OPTS="--disable-ssl"
+- os: linux-ppc64le
+  compiler: gcc
+  env: OPTS="--disable-ssl"
script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS
  diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh
index e546d32cb..f3a9a6d44 100755
--- a/.travis/linux-prepare.sh
+++ b/.travis/linux-prepare.sh
@@ -15,8 +15,18 @@ cd ..
  pip install --disable-pip-version-check --user six flake8 hacking
  pip install --user --upgrade docutils
  -if [ "$M32" ]; then
-# 32-bit and 64-bit libunwind can not be installed at the same 
time.
-# This will remove the 64-bit libunwind and install 32-bit 
version.

-sudo apt-get install -y libunwind-dev:i386
+# Include packages needed by specific architectures.
+if [ $TRAVIS_ARCH == amd64 ]; then
+   sudo apt-get install -y \
+libunbound-dev:i386 \
+gcc-multilib


These packages are only needed for 32bit build, so you may just move 
them

to the command that installs 32bit version of libunwind.
And since you're not building 32bit with ppc64le, the code could look 
like:


if [ "$M32" ]; then
   # install 32 bit libs
fi

if [ $TRAVIS_ARCH == ppc64le ]; then
   # install ppc64le specific things.
fi


Agreed.




+
+if [ "$M32" ]; then
+   # 32-bit and 64-bit libunwind can not be installed at the same 
time.
+   # This will remove the 64-bit libunwind and install 32-bit 
version.

+   sudo apt-get install -y libunwind-dev:i386
+fi
+
+elif [ $TRAVIS_ARCH == ppc64le ]; then
+   sudo apt-get install -y flex


Why 'flex' is needed?


I found that Flex was needed for kernel builds, but since I am not doing 
that yet I can remove it.


Thanks for the review,  I will post a V2 patch shortly.



Best regar

Re: [ovs-dev] [PATCH ] travis: support ppc64le builds

2019-10-29 Thread Ilya Maximets

On 28.10.2019 22:22, David Wilder wrote:

Add support for travis-ci ppc64le builds.

- Updated matrix in .travis.yml to include a ppc64le build.
- Added support to install packages needed by specific architectures.

To keep the total build time at an acceptable level only a single build
job is included in the matrix for ppc64le.

A build report example can be found here [1]
[0] http://travis-ci.org/
[1] https://travis-ci.org/djlwilder/ovs/builds/604098141

Signed-off-by: David Wilder 


Hi David,
Thanks for working on this. I have a couple of question regarding
ppc64le support by TravisCI.  It seems that they are not supporting
this architecture officially and refusing[1] to solve any issues that
appears while using it. There also no official documentation.
It's kind of a hidden feature that some projects are using for their
own risk [2]. Do you know why this happens or maybe you have some
insights about what is going on/how it works?
The API is also a bit strange because Travis started to officially
support arm builds, but this is done via 'arch' knob, not the 'os'.
Will it be changed over time for ppc64le?

[1] 
https://travis-ci.community/t/ppc64le-arch-support-on-travis-ci-com-vs-travis-ci-org/2898/2
[2] 
https://github.com/openssl/openssl/commit/13da3ad00c80e1da816ca27f6c15b0ecee1bb0b8

Few code comments inline.


---
  .travis.yml  |  5 +++--
  .travis/linux-prepare.sh | 18 ++
  2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 5676d9748..c99f26815 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -14,7 +14,6 @@ addons:
apt:
  packages:
- bc
-  - gcc-multilib
- libssl-dev
- llvm-dev
- libjemalloc1
@@ -24,7 +23,6 @@ addons:
- libelf-dev
- selinux-policy-dev
- libunbound-dev
-  - libunbound-dev:i386
- libunwind-dev
  
  before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh

@@ -50,6 +48,9 @@ matrix:
  - os: osx
compiler: clang
env: OPTS="--disable-ssl"
+- os: linux-ppc64le
+  compiler: gcc
+  env: OPTS="--disable-ssl"
  
  script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS
  
diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh

index e546d32cb..f3a9a6d44 100755
--- a/.travis/linux-prepare.sh
+++ b/.travis/linux-prepare.sh
@@ -15,8 +15,18 @@ cd ..
  pip install --disable-pip-version-check --user six flake8 hacking
  pip install --user --upgrade docutils
  
-if [ "$M32" ]; then

-# 32-bit and 64-bit libunwind can not be installed at the same time.
-# This will remove the 64-bit libunwind and install 32-bit version.
-sudo apt-get install -y libunwind-dev:i386
+# Include packages needed by specific architectures.
+if [ $TRAVIS_ARCH == amd64 ]; then
+   sudo apt-get install -y \
+libunbound-dev:i386 \
+gcc-multilib


These packages are only needed for 32bit build, so you may just move them
to the command that installs 32bit version of libunwind.
And since you're not building 32bit with ppc64le, the code could look like:

if [ "$M32" ]; then
   # install 32 bit libs
fi

if [ $TRAVIS_ARCH == ppc64le ]; then
   # install ppc64le specific things.
fi


+
+if [ "$M32" ]; then
+   # 32-bit and 64-bit libunwind can not be installed at the same time.
+   # This will remove the 64-bit libunwind and install 32-bit version.
+   sudo apt-get install -y libunwind-dev:i386
+fi
+
+elif [ $TRAVIS_ARCH == ppc64le ]; then
+   sudo apt-get install -y flex


Why 'flex' is needed?

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] Cómo hacer una presentación profesional

2019-10-29 Thread Herramientas de presentación de proyectos
Me da mucho gusto saludarte.

Es, para mí, un placer poder invitarte a nuestro Curso en Línea "¿Cómo 
presentar resultados a nivel profesional?", que se estará llevando a cabo el 
día Miércoles 13 de Noviembre con un horario de 10:00 a 14:00 hrs. (hora del 
centro de México)

Presentar un proyecto a nivel profesional utilizando las herramientas adecuadas 
y teniendo una planeación óptima.

Ejes Temáticos:

- ¿Cómo presentar un proyecto?
- Elaboración de proyectos.
- Etapas de un proyecto.
- Herramientas a utilizar en una presentación.
- Organización y control de tiempos.
- Como hacer una propuesta de trabajo profesional.
- Como presentar resultados a nivel profesional.


Teléfonos: (045) 55 15 54 66 30 - (045) 55 85567293 - (045) 5530167085 

Solicita información respondiendo a este correo con la palabra Power, junto con 
los siguientes datos:

Nombre:
Correo electrónico:
Número telefónico:


Qué tengas un excelente día

Si desea dejar de recibir nuestra promoción favor de responder con la palabra 
baja o enviar un correo a bajas@ innovalearn.net


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCHv5] netdev-linux: Detect numa node id.

2019-10-29 Thread Ilya Maximets

On 23.10.2019 23:08, William Tu wrote:

The patch detects the numa node id from the name of the netdev,
by reading the '/sys/class/net//device/numa_node'.
If not available, ex: virtual device, or any error happens,
return numa id 0.  Currently only the afxdp netdev type uses it,
other linux netdev types are disabled due to no use case.

Signed-off-by: William Tu 
---
v5:
   Feedbacks from Ilya
   - reafactor the error handling
   - add mutex lock
   - Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/601947245

v4:
   Feedbacks from Eelco
   - Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/599308893

v3:
   Feedbacks from Ilya and Eelco
   - update doc, afxdp.rst
   - fix coding style
   - fix limit of numa node max, by using ovs_numa_numa_id_is_valid
   - move the function to netdev-linux
   - cache the result of numa_id
   - add security check for netdev name
   - use fscanf instead of read and convert to int
   - revise some error message content

v2:
   address feedback from Eelco
fix memory leak of xaspintf
log using INFO instead of WARN
---
  Documentation/intro/install/afxdp.rst |  1 -
  lib/netdev-afxdp.c| 11 --
  lib/netdev-linux-private.h|  2 ++
  lib/netdev-linux.c| 63 ++-
  4 files changed, 64 insertions(+), 13 deletions(-)

diff --git a/Documentation/intro/install/afxdp.rst 
b/Documentation/intro/install/afxdp.rst
index 820e9d993d8f..6c00c4ad1356 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -309,7 +309,6 @@ Below is a script using namespaces and veth peer::
  
  Limitations/Known Issues

  
-#. Device's numa ID is always 0, need a way to find numa id from a netdev.
  #. No QoS support because AF_XDP netdev by-pass the Linux TC layer. A possible
 work-around is to use OpenFlow meter action.
  #. Most of the tests are done using i40e single port. Multiple ports and
diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 8eb270c150e8..cfd93fab9f45 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -543,17 +543,6 @@ out:
  return err;
  }
  
-int

-netdev_afxdp_get_numa_id(const struct netdev *netdev)
-{
-/* FIXME: Get netdev's PCIe device ID, then find
- * its NUMA node id.
- */
-VLOG_INFO("FIXME: Device %s always use numa id 0.",
-  netdev_get_name(netdev));
-return 0;
-}
-
  static void
  xsk_remove_xdp_program(uint32_t ifindex, int xdpmode)
  {
diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h
index a350be151147..c8f2be47b10b 100644
--- a/lib/netdev-linux-private.h
+++ b/lib/netdev-linux-private.h
@@ -96,6 +96,8 @@ struct netdev_linux {
  /* LAG information. */
  bool is_lag_master; /* True if the netdev is a LAG master. */
  
+int numa_id;/* NUMA node id. */

+
  #ifdef HAVE_AF_XDP
  /* AF_XDP information. */
  struct xsk_socket_info **xsks;
diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index f4819237370a..4dd05493b238 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -236,6 +236,7 @@ enum {
  VALID_VPORT_STAT_ERROR  = 1 << 5,
  VALID_DRVINFO   = 1 << 6,
  VALID_FEATURES  = 1 << 7,
+VALID_NUMA_ID   = 1 << 8,
  };
  
  struct linux_lag_slave {
@@ -1391,6 +1392,66 @@ netdev_linux_tap_batch_send(struct netdev *netdev_,
  return 0;
  }
  
+static int

+netdev_linux_get_numa_id__(const struct netdev *netdev_)


Since you've created a separate function, I think we need to add
a thread safety annotation here: OVS_REQUIRES(netdev->mutex)

For this purpose, you'll better pass the instance of 'netdev_linux'
in this function.  Original netdev only used to get the netdev name,
so it'll be not hard to use &netdev->up instead for that purpose.

Second thing is that we should preserve numa cache on netlink updates.
Something like this:

diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
index 4dd05493b..0495a035f 100644
--- a/lib/netdev-linux.c
+++ b/lib/netdev-linux.c
@@ -821,9 +821,9 @@ netdev_linux_update__(struct netdev_linux *dev,
 {
 if (rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) {
 if (change->nlmsg_type == RTM_NEWLINK) {
-/* Keep drv-info, and ip addresses. */
+/* Keep drv-info, ip addresses and NUMA id. */
 netdev_linux_changed(dev, change->ifi_flags,
- VALID_DRVINFO | VALID_IN);
+ VALID_DRVINFO | VALID_IN | VALID_NUMA_ID);
 
 /* Update netdev from rtnl-change msg. */

 if (change->mtu) {
---

Without this change we're re-checking the numa id after each time device
flags/mtu/whatever updated.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] Fix system-ovn test failures

2019-10-29 Thread Numan Siddique
On Tue, Oct 29, 2019 at 8:16 PM Russell Bryant  wrote:
>
> Thanks, I just hit this and the patch fixes it for me.
>
> Acked-by: Russell Bryant 

Thanks for the reviews. I applied this patch to master.

Numan

>
> On Tue, Oct 29, 2019 at 9:56 AM Han Zhou  wrote:
> >
> > Acked-by: hz...@ovn.org
> >
> > On Tue, Oct 29, 2019 at 5:26 AM  wrote:
> >
> > > From: Numan Siddique 
> > >
> > > The commit b740928656a1("testsuite: Use ovn-macros instead of
> > > ofproto-macros.")
> > > missed updating the system test suite files to include ovn-macros.at. This
> > > patch adds it.
> > >
> > > CC: Han Zhou 
> > > Signed-off-by: Numan Siddique 
> > > ---
> > >  tests/system-kmod-testsuite.at  | 1 +
> > >  tests/system-userspace-testsuite.at | 1 +
> > >  2 files changed, 2 insertions(+)
> > >
> > > diff --git a/tests/system-kmod-testsuite.at b/tests/
> > > system-kmod-testsuite.at
> > > index 6c8478093..2ccd9f1ce 100644
> > > --- a/tests/system-kmod-testsuite.at
> > > +++ b/tests/system-kmod-testsuite.at
> > > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
> > >  m4_include([tests/ovs-macros.at])
> > >  m4_include([tests/ovsdb-macros.at])
> > >  m4_include([tests/ofproto-macros.at])
> > > +m4_include([tests/ovn-macros.at])
> > >  m4_include([tests/system-common-macros.at])
> > >  m4_include([tests/system-kmod-macros.at])
> > >
> > > diff --git a/tests/system-userspace-testsuite.at b/tests/
> > > system-userspace-testsuite.at
> > > index 784eedd2c..4022ae620 100644
> > > --- a/tests/system-userspace-testsuite.at
> > > +++ b/tests/system-userspace-testsuite.at
> > > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
> > >  m4_include([tests/ovs-macros.at])
> > >  m4_include([tests/ovsdb-macros.at])
> > >  m4_include([tests/ofproto-macros.at])
> > > +m4_include([tests/ovn-macros.at])
> > >  m4_include([tests/system-userspace-macros.at])
> > >  m4_include([tests/system-common-macros.at])
> > >
> > > --
> > > 2.21.0
> > >
> > >
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
>
>
> --
> Russell Bryant
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCHv2] netdev-afxdp: Enable loading XDP program.

2019-10-29 Thread Eelco Chaudron

See comments inline…

On 5 Oct 2019, at 2:12, William Tu wrote:


Now netdev-afxdp always forwards all packets to userspace because
it is using libbpf's default XDP program, see 'xsk_load_xdp_prog'.
There are some cases when users want to keep packets in kernel instead
of sending to userspace, for example, management traffic such as SSH
should be processed in kernel.

The patch enables loading the user-provide XDP program by doing
  $ovs-vsctl -- set int afxdp-p0 options:xdpobj=


Should we maybe change the name to xdp-obj?


So users can implement their filtering logic or traffic steering idea
in their XDP program, and rest of the traffic passes to AF_XDP socket
handled by OVS.

Signed-off-by: William Tu 
---
v2:
A couple fixes and remove RFC
---
 Documentation/intro/install/afxdp.rst |  46 
 lib/netdev-afxdp.c| 137 
++

 lib/netdev-linux-private.h|   2 +
 3 files changed, 169 insertions(+), 16 deletions(-)

diff --git a/Documentation/intro/install/afxdp.rst 
b/Documentation/intro/install/afxdp.rst

index 820e9d993d8f..f484bdb8249b 100644
--- a/Documentation/intro/install/afxdp.rst
+++ b/Documentation/intro/install/afxdp.rst
@@ -265,6 +265,52 @@ Or, use OVS pmd tool::
   ovs-appctl dpif-netdev/pmd-stats-show


+Loading Custom XDP Program
+--
+By defailt, netdev-afxdp always forwards all packets to userspace 
because
+it is using libbpf's default XDP program. There are some cases when 
users
+want to keep packets in kernel instead of sending to userspace, for 
example,
+management traffic such as SSH should be processed in kernel. This 
can be

+done by loading the user-provided XDP program::
+
+  ovs-vsctl -- set int afxdp-p0 options:xdpobj=
+
+So users can implement their filtering logic or traffic steering idea
+in their XDP program, and rest of the traffic passes to AF_XDP socket
+handled by OVS. Below is a sample C program compiled under kernel's
+samples/bpf/.
+
+.. code-block:: c
+
+  #include 
+  #include "bpf_helpers.h"
+
+  /* OVS will associate map 'xsks_map' to xsk socket. */
+  struct bpf_map_def SEC("maps") xsks_map = {
+  .type = BPF_MAP_TYPE_XSKMAP,
+  .key_size = sizeof(int),
+  .value_size = sizeof(int),
+  .max_entries = 32,
+  };
+


Maybe something like this could be added to make sure it compiles with 
older kernels:


#if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0)

/* Kernel version before 5.3 needed an additional map */
struct bpf_map_def SEC("maps") qidconf_map = {
.type = BPF_MAP_TYPE_ARRAY,
.key_size = sizeof(int),
.value_size = sizeof(int),
.max_entries = 64,
};
#endi

See also here:
https://github.com/chaudron/xdp-tutorial/blob/master/advanced03-AF_XDP/af_xdp_kern.c


+  SEC("xdp_sock")
+  int xdp_sock_prog(struct xdp_md *ctx)
+  {
+  int index = ctx->rx_queue_index;
+
+  /* Customized by user.
+   * For example
+   * 1) filter out all SSH traffic and return XDP_PASS
+   *for kernel to process.
+   * 2) Drop unwanted packet by returning XDP_DROP.
+   */
+
+  /* Rest of packets goes to AF_XDP. */
+  return bpf_redirect_map(&xsks_map, index, 0);
+  }
+  char _license[] SEC("license") = "GPL";
+
+
 Example Script
 --

diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c
index 6e01803272aa..231d6473fb67 100644
--- a/lib/netdev-afxdp.c
+++ b/lib/netdev-afxdp.c
@@ -21,6 +21,7 @@
 #include "netdev-afxdp.h"
 #include "netdev-afxdp-pool.h"

+#include 
 #include 
 #include 
 #include 
@@ -29,6 +30,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 

@@ -82,7 +84,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS);
 #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char 
*)base))


 static struct xsk_socket_info *xsk_configure(int ifindex, int 
xdp_queue_id,

- int mode);
+ int mode, bool 
use_default_xdp);

 static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode);
 static void xsk_destroy(struct xsk_socket_info *xsk);
 static int xsk_configure_all(struct netdev *netdev);
@@ -158,6 +160,51 @@ netdev_afxdp_sweep_unused_pools(void *aux 
OVS_UNUSED)

 ovs_mutex_unlock(&unused_pools_mutex);
 }

+static int
+xsk_load_prog(const char *path, struct bpf_object **obj,
+  int *prog_fd)
+{
+struct bpf_prog_load_attr attr = {
+.prog_type = BPF_PROG_TYPE_XDP,
+.file = path,
+};
+
+if (bpf_prog_load_xattr(&attr, obj, prog_fd)) {
+VLOG_WARN("Can't load XDP program at '%s'", path);


I think it should be a VLOG_ERR()


+return EINVAL;
+}
+
+VLOG_INFO("Loaded XDP program at '%s'", path);
+return 0;
+}
+
+static int
+xsk_configure_map(struct bpf_object *obj, struct xsk_socket_info 
*xsk,

+  int queue_id)
+{
+struct bpf_map *map;
+int xsks_map_fd;
+int xsk_fd;
+int ret;
+
+  

[ovs-dev] [PATCH ovn] ovn-controller: Fix use of dangling pointers in I-P runtime_data.

2019-10-29 Thread Dumitru Ceara
The incremental processing engine might stop a run before the
en_runtime_data node is processed. In such cases the ed_runtime_data
fields might contain pointers to already deleted SB records. For
example, if a port binding corresponding to a patch port is removed from
the SB database and the incremental processing engine aborts before the
en_runtime_data node is processed then the corresponding local_datapath
hashtable entry in ed_runtime_data is stale and will store a pointer to
the already freed sbrec_port_binding record.

This will cause invalid memory accesses in various places (e.g.,
pinctrl_run() -> prepare_ipv6_ras()).

To fix the issue we need a way to track how each node was processed
during an engine run. This commit transforms the 'changed' field in
struct engine_node in a 'state' field. Possible node states are:
- "Stale": data in the node is not up to date with the DB.
- "Updated": data in the node is valid but was updated during
  the last run of the engine.
- "Valid": data in the node is valid and didn't change during
  the last run of the engine.
- "Aborted": during the last run, processing was aborted for
  this node.

The commit also simplifies the logic of calling engine_run and
engine_need_run in order to reduce the number of external variables
required to track the result of the last engine execution.

Functions that need to be called from the main loop and depend on
various data contents of the engine's nodes are now called only if
the data is up to date.

CC: Han Zhou 
Fixes: a6b7d9f4f047 ("ovn-controller: Fix flow installation latency
caused by recompute.")
Signed-off-by: Dumitru Ceara 
---
 controller/ovn-controller.c | 144 +---
 lib/inc-proc-eng.c  | 117 +--
 lib/inc-proc-eng.h  |  47 ---
 3 files changed, 221 insertions(+), 87 deletions(-)

diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index 9ab98be..8e07d5e 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -758,10 +758,10 @@ en_ofctrl_is_connected_run(struct engine_node *node)
 (struct ed_type_ofctrl_is_connected *)node->data;
 if (data->connected != ofctrl_is_connected()) {
 data->connected = !data->connected;
-node->changed = true;
+engine_set_node_state(node, EN_UPDATED);
 return;
 }
-node->changed = false;
+engine_set_node_state(node, EN_VALID);
 }
 
 struct ed_type_addr_sets {
@@ -811,7 +811,7 @@ en_addr_sets_run(struct engine_node *node)
 addr_sets_init(as_table, &as->addr_sets);
 
 as->change_tracked = false;
-node->changed = true;
+engine_set_node_state(node, EN_UPDATED);
 }
 
 static bool
@@ -830,11 +830,14 @@ addr_sets_sb_address_set_handler(struct engine_node *node)
 addr_sets_update(as_table, &as->addr_sets, &as->new,
  &as->deleted, &as->updated);
 
-node->changed = !sset_is_empty(&as->new) || !sset_is_empty(&as->deleted)
-|| !sset_is_empty(&as->updated);
+if (!sset_is_empty(&as->new) || !sset_is_empty(&as->deleted) ||
+!sset_is_empty(&as->updated)) {
+engine_set_node_state(node, EN_UPDATED);
+} else {
+engine_set_node_state(node, EN_VALID);
+}
 
 as->change_tracked = true;
-node->changed = true;
 return true;
 }
 
@@ -885,7 +888,7 @@ en_port_groups_run(struct engine_node *node)
 port_groups_init(pg_table, &pg->port_groups);
 
 pg->change_tracked = false;
-node->changed = true;
+engine_set_node_state(node, EN_UPDATED);
 }
 
 static bool
@@ -904,11 +907,14 @@ port_groups_sb_port_group_handler(struct engine_node 
*node)
 port_groups_update(pg_table, &pg->port_groups, &pg->new,
  &pg->deleted, &pg->updated);
 
-node->changed = !sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted)
-|| !sset_is_empty(&pg->updated);
+if (!sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted) ||
+!sset_is_empty(&pg->updated)) {
+engine_set_node_state(node, EN_UPDATED);
+} else {
+engine_set_node_state(node, EN_VALID);
+}
 
 pg->change_tracked = true;
-node->changed = true;
 return true;
 }
 
@@ -1091,7 +1097,7 @@ en_runtime_data_run(struct engine_node *node)
 update_ct_zones(local_lports, local_datapaths, ct_zones,
 ct_zone_bitmap, pending_ct_zones);
 
-node->changed = true;
+engine_set_node_state(node, EN_UPDATED);
 }
 
 static bool
@@ -1157,10 +1163,10 @@ en_mff_ovn_geneve_run(struct engine_node *node)
 enum mf_field_id mff_ovn_geneve = ofctrl_get_mf_field_id();
 if (data->mff_ovn_geneve != mff_ovn_geneve) {
 data->mff_ovn_geneve = mff_ovn_geneve;
-node->changed = true;
+engine_set_node_state(node, EN_UPDATED);
 return;
 }
-node->changed = false;
+engine_set_node_state(node, EN_VALID);
 }
 
 struct ed_type_flow_output

Re: [ovs-dev] [PATCH ovn] Fix system-ovn test failures

2019-10-29 Thread Russell Bryant
Thanks, I just hit this and the patch fixes it for me.

Acked-by: Russell Bryant 

On Tue, Oct 29, 2019 at 9:56 AM Han Zhou  wrote:
>
> Acked-by: hz...@ovn.org
>
> On Tue, Oct 29, 2019 at 5:26 AM  wrote:
>
> > From: Numan Siddique 
> >
> > The commit b740928656a1("testsuite: Use ovn-macros instead of
> > ofproto-macros.")
> > missed updating the system test suite files to include ovn-macros.at. This
> > patch adds it.
> >
> > CC: Han Zhou 
> > Signed-off-by: Numan Siddique 
> > ---
> >  tests/system-kmod-testsuite.at  | 1 +
> >  tests/system-userspace-testsuite.at | 1 +
> >  2 files changed, 2 insertions(+)
> >
> > diff --git a/tests/system-kmod-testsuite.at b/tests/
> > system-kmod-testsuite.at
> > index 6c8478093..2ccd9f1ce 100644
> > --- a/tests/system-kmod-testsuite.at
> > +++ b/tests/system-kmod-testsuite.at
> > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
> >  m4_include([tests/ovs-macros.at])
> >  m4_include([tests/ovsdb-macros.at])
> >  m4_include([tests/ofproto-macros.at])
> > +m4_include([tests/ovn-macros.at])
> >  m4_include([tests/system-common-macros.at])
> >  m4_include([tests/system-kmod-macros.at])
> >
> > diff --git a/tests/system-userspace-testsuite.at b/tests/
> > system-userspace-testsuite.at
> > index 784eedd2c..4022ae620 100644
> > --- a/tests/system-userspace-testsuite.at
> > +++ b/tests/system-userspace-testsuite.at
> > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
> >  m4_include([tests/ovs-macros.at])
> >  m4_include([tests/ovsdb-macros.at])
> >  m4_include([tests/ofproto-macros.at])
> > +m4_include([tests/ovn-macros.at])
> >  m4_include([tests/system-userspace-macros.at])
> >  m4_include([tests/system-common-macros.at])
> >
> > --
> > 2.21.0
> >
> >
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev



-- 
Russell Bryant
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] Fix system-ovn test failures

2019-10-29 Thread Han Zhou
Acked-by: hz...@ovn.org

On Tue, Oct 29, 2019 at 5:26 AM  wrote:

> From: Numan Siddique 
>
> The commit b740928656a1("testsuite: Use ovn-macros instead of
> ofproto-macros.")
> missed updating the system test suite files to include ovn-macros.at. This
> patch adds it.
>
> CC: Han Zhou 
> Signed-off-by: Numan Siddique 
> ---
>  tests/system-kmod-testsuite.at  | 1 +
>  tests/system-userspace-testsuite.at | 1 +
>  2 files changed, 2 insertions(+)
>
> diff --git a/tests/system-kmod-testsuite.at b/tests/
> system-kmod-testsuite.at
> index 6c8478093..2ccd9f1ce 100644
> --- a/tests/system-kmod-testsuite.at
> +++ b/tests/system-kmod-testsuite.at
> @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
>  m4_include([tests/ovs-macros.at])
>  m4_include([tests/ovsdb-macros.at])
>  m4_include([tests/ofproto-macros.at])
> +m4_include([tests/ovn-macros.at])
>  m4_include([tests/system-common-macros.at])
>  m4_include([tests/system-kmod-macros.at])
>
> diff --git a/tests/system-userspace-testsuite.at b/tests/
> system-userspace-testsuite.at
> index 784eedd2c..4022ae620 100644
> --- a/tests/system-userspace-testsuite.at
> +++ b/tests/system-userspace-testsuite.at
> @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
>  m4_include([tests/ovs-macros.at])
>  m4_include([tests/ovsdb-macros.at])
>  m4_include([tests/ofproto-macros.at])
> +m4_include([tests/ovn-macros.at])
>  m4_include([tests/system-userspace-macros.at])
>  m4_include([tests/system-common-macros.at])
>
> --
> 2.21.0
>
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] dpif-netdev: log rxq assignment in isolated pmd

2019-10-29 Thread Gowrishankar Muthukrishnan
Could this patch be reviewed please.

Thanks,
Gowrishankar

On Tue, Oct 22, 2019 at 3:06 PM Gowrishankar Muthukrishnan <
gmuth...@redhat.com> wrote:

> There is no log about isolated rxq assignment in a pmd today, which
> sometimes could be useful to trace rxq/pmd pinning, when debugging
> with log. Ovs-appctl dpif-netdev/pmd-rxq-show reports about it
> already, but logging is helpful to trace pinning in time.
>
> Changes:
>   v2: init numa_id for the info.
>
> Reported-at: https://bugzilla.redhat.com/1728616
> Signed-off-by: Gowrishankar Muthukrishnan 
> ---
>  lib/dpif-netdev.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 4546b55e8..4d7c36787 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -4573,6 +4573,11 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned)
> OVS_REQUIRES(dp->port_mutex)
>  q->pmd = pmd;
>  pmd->isolated = true;
>  dp_netdev_pmd_unref(pmd);
> +numa_id = netdev_get_numa_id(q->port->netdev);
> +VLOG_INFO("Core %d on numa node %d assigned port
> \'%s\' "
> +  "rx queue %d.", pmd->core_id, numa_id,
> +  netdev_rxq_get_name(q->rx),
> +  netdev_rxq_get_queue_id(q->rx));
>  }
>  } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) {
>  uint64_t cycle_hist = 0;
> --
> 2.17.2
>
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/1] ovsdb_execute_mutate: removed unused ovsdb_row pointer variable

2019-10-29 Thread Damijan Skvarc
Signed-off-by: Damijan Skvarc 
---
 ovsdb/execution.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ovsdb/execution.c b/ovsdb/execution.c
index c55a0b7..f2cf3d7 100644
--- a/ovsdb/execution.c
+++ b/ovsdb/execution.c
@@ -562,7 +562,6 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 const struct json *mutations_json;
 struct ovsdb_condition condition = OVSDB_CONDITION_INITIALIZER(&condition);
 struct ovsdb_mutation_set mutations = OVSDB_MUTATION_SET_INITIALIZER;
-struct ovsdb_row *row = NULL;
 struct mutate_row_cbdata mr;
 struct ovsdb_error *error;
 
@@ -595,7 +594,6 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct 
ovsdb_parser *parser,
 json_object_put(result, "count", json_integer_create(mr.n_matches));
 }
 
-ovsdb_row_destroy(row);
 ovsdb_mutation_set_destroy(&mutations);
 ovsdb_condition_destroy(&condition);
 
-- 
2.7.4

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v6 ovn] Add DNSSL support to OVN

2019-10-29 Thread Numan Siddique
On Tue, Oct 29, 2019 at 5:49 PM Lorenzo Bianconi
 wrote:
>
> Introduce the possibility to specify a DNSSL option to Router
> Advertisement packets. DNS Search list can be specified using
> 'dnssl' tag in the ipv6_ra_configs column of logical router
> port table
>
> Signed-off-by: Lorenzo Bianconi 

Thanks. I applied this patch to master.

Numan

> ---
> Changes since v5:
> - add comment describing how domain names are encoded
>
> Changes since v4:
> - update documentation
> - fix possible array out of bounds issue in packet_put_ra_dnssl_opt
> - fix strtok_r usage
> ---
>  controller/pinctrl.c | 63 
>  lib/ovn-l7.h | 11 
>  northd/ovn-northd.c  |  4 +++
>  ovn-nb.xml   |  5 
>  tests/ovn.at | 30 ++---
>  5 files changed, 103 insertions(+), 10 deletions(-)
>
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index f105ebb5c..655ba54a1 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -2194,6 +2194,7 @@ struct ipv6_ra_config {
>  struct lport_addresses prefixes;
>  struct in6_addr rdnss;
>  bool has_rdnss;
> +struct ds dnssl;
>  };
>
>  struct ipv6_ra_state {
> @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config)
>  {
>  if (config) {
>  destroy_lport_addresses(&config->prefixes);
> +ds_destroy(&config->dnssl);
>  free(config);
>  }
>  }
> @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
> *pb)
>  nd_ra_min_interval_default(config->max_interval));
>  config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT);
>  config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK;
> +ds_init(&config->dnssl);
>
>  const char *address_mode = smap_get(&pb->options, 
> "ipv6_ra_address_mode");
>  if (!address_mode) {
> @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
> *pb)
>  }
>  config->has_rdnss = !!rdnss;
>
> +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl");
> +if (dnssl) {
> +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl));
> +}
> +
>  return config;
>
>  fail:
> @@ -2354,6 +2362,57 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t 
> num,
>prev_l4_size + len * 
> 8));
>  }
>
> +static void
> +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime,
> +char *dnssl_list)
> +{
> +size_t prev_l4_size = dp_packet_l4_size(b);
> +size_t size = sizeof(struct ovs_nd_dnssl);
> +struct ip6_hdr *nh = dp_packet_l3(b);
> +char *t0, *r0, dnssl[255] = {};
> +int i = 0;
> +
> +/* Multiple DNS Search List must be 'comma' separated
> + * (e.g. "a.b.c, d.e.f"). Domain names must be encoded
> + * as described in Section 3.1 of RFC1035.
> + * (e.g if dns list is a.b.c,www.ovn.org, it will be encoded as:
> + * 01 61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00
> + */
> +for (t0 = strtok_r(dnssl_list, ",", &r0); t0;
> + t0 = strtok_r(NULL, ",", &r0)) {
> +char *t1, *r1;
> +
> +size += strlen(t0) + 2;
> +if (size > sizeof(dnssl)) {
> +return;
> +}
> +
> +for (t1 = strtok_r(t0, ".", &r1); t1;
> + t1 = strtok_r(NULL, ".", &r1)) {
> +dnssl[i++] = strlen(t1);
> +memcpy(&dnssl[i], t1, strlen(t1));
> +i += strlen(t1);
> +}
> +dnssl[i++] = 0;
> +}
> +size = ROUND_UP(size, 8);
> +nh->ip6_plen = htons(prev_l4_size + size);
> +
> +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof 
> *nd_dnssl);
> +nd_dnssl->type = ND_OPT_DNSSL;
> +nd_dnssl->len = size / 8;
> +nd_dnssl->reserved = 0;
> +put_16aligned_be32(&nd_dnssl->lifetime, lifetime);
> +
> +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl);
> +
> +struct ovs_ra_msg *ra = dp_packet_l4(b);
> +ra->icmph.icmp6_cksum = 0;
> +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
> +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra,
> +  prev_l4_size + size));
> +}
> +
>  /* Called with in the pinctrl_handler thread context. */
>  static long long int
>  ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra)
> @@ -2382,6 +2441,10 @@ ipv6_ra_send(struct rconn *swconn, struct 
> ipv6_ra_state *ra)
>  packet_put_ra_rdnss_opt(&packet, 1, htonl(0x),
>  &ra->config->rdnss);
>  }
> +if (ra->config->dnssl.length) {
> +packet_put_ra_dnssl_opt(&packet, htonl(0x),
> +ra->config->dnssl.string);
> +}
>
>  uint64_t ofpacts_stub[4096 / 8];
>  struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
> index 12cee6329

Re: [ovs-dev] [PATCH v5 ovn] Add DNSSL support to OVN

2019-10-29 Thread Numan Siddique
On Tue, Oct 29, 2019 at 4:39 PM Lorenzo Bianconi
 wrote:
>
> > On Tue, Oct 29, 2019 at 3:29 PM Lorenzo Bianconi
> >  wrote:
> > >
> > > Introduce the possibility to specify a DNSSL option to Router
> > > Advertisement packets. DNS Search list can be specified using
> > > 'dnssl' tag in the ipv6_ra_configs column of logical router
> > > port table
> > >
> > > Signed-off-by: Lorenzo Bianconi 
> > > ---
> > > Changes since v4:
> > > - update documentation
> > > - fix possible array out of bounds issue in packet_put_ra_dnssl_opt
> > > - fix strtok_r usage
> > > ---
> > >  controller/pinctrl.c | 59 
> > >  lib/ovn-l7.h | 11 +
> > >  northd/ovn-northd.c  |  4 +++
> > >  ovn-nb.xml   |  5 
> > >  tests/ovn.at | 30 ++
> > >  5 files changed, 99 insertions(+), 10 deletions(-)
> > >
> > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > > index f105ebb5c..cddf98648 100644
> > > --- a/controller/pinctrl.c
> > > +++ b/controller/pinctrl.c
> > > @@ -2194,6 +2194,7 @@ struct ipv6_ra_config {
> > >  struct lport_addresses prefixes;
> > >  struct in6_addr rdnss;
> > >  bool has_rdnss;
> > > +struct ds dnssl;
> > >  };
> > >
> > >  struct ipv6_ra_state {
> > > @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config)
> > >  {
> > >  if (config) {
> > >  destroy_lport_addresses(&config->prefixes);
> > > +ds_destroy(&config->dnssl);
> > >  free(config);
> > >  }
> > >  }
> > > @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct 
> > > sbrec_port_binding *pb)
> > >  nd_ra_min_interval_default(config->max_interval));
> > >  config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", 
> > > ND_MTU_DEFAULT);
> > >  config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK;
> > > +ds_init(&config->dnssl);
> > >
> > >  const char *address_mode = smap_get(&pb->options, 
> > > "ipv6_ra_address_mode");
> > >  if (!address_mode) {
> > > @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct 
> > > sbrec_port_binding *pb)
> > >  }
> > >  config->has_rdnss = !!rdnss;
> > >
> > > +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl");
> > > +if (dnssl) {
> > > +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl));
> > > +}
> > > +
> > >  return config;
> > >
> > >  fail:
> > > @@ -2354,6 +2362,53 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, 
> > > uint8_t num,
> > >prev_l4_size + len 
> > > * 8));
> > >  }
> > >
> > > +static void
> > > +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime,
> > > +char *dnssl_list)
> > > +{
> > > +size_t prev_l4_size = dp_packet_l4_size(b);
> > > +size_t size = sizeof(struct ovs_nd_dnssl);
> > > +struct ip6_hdr *nh = dp_packet_l3(b);
> > > +char *t0, *r0, dnssl[255] = {};
> > > +int i = 0;
> > > +
> > > +/* Multiple DNS Search List must be 'comma' separated
> > > + * (e.g. "a.b.c, d.e.f") */
> >
> > Can you please add a comment on how the dns list will be encode with an 
> > example.
> > Like if dns list is a.b.c,www.ovn.org then it will be encoded as - 01
> > 61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00
> >
> > This will really help in understanding what's going on. Also can you
> > link the RFC version.
>
> Hi Numan,
>
> Thx for the review. Ack, will do in v6
>
> >
> > > +for (t0 = strtok_r(dnssl_list, ",", &r0); t0;
> > > + t0 = strtok_r(NULL, ",", &r0)) {
> > > +char *t1, *r1;
> > > +
> > > +size += strlen(t0) + 2;
> > > +if (size > sizeof(dnssl)) {
> >
> > For the sake of simplicity let's assume the length of dnssl is 10.
> > Then if the dns list is a.b.c,d.e.f then this will cause the buffer to
> > overflow right ?
> >
> > For the first outer for loop with t0 = a.b.c, the 'size' will be 7. So 7 < 
> > 10.
> > In the next outer for loop with t0 = d.e.f, the size will be again 7
> > and it will cause the inner for loop  to copy the contents beyond
> > the size of dnssl right ?
> >
> > I think this code needs to be refined a bit to take all these things
> > into consideration.
> >
>
> I guess the check is fine since size value will not be reset:
> size += strlen(t0) + 2;

Ah right. I didn't notice "+=". Thank for pointing this out.


>
> Regards,
> Lorenzo
>
>
> > Thanks
> > Numan
> >
> > > +return;
> > > +}
> > > +
> >
> >
> >
> > > +for (t1 = strtok_r(t0, ".", &r1); t1;
> > > + t1 = strtok_r(NULL, ".", &r1)) {
> > > +dnssl[i++] = strlen(t1);
> > > +memcpy(&dnssl[i], t1, strlen(t1));
> > > +i += strlen(t1);
> > > +}
> > > +dnssl[i++] = 0;
> > > +}
> > > +size = ROUND_UP(size, 8);
> > > +nh->ip6_plen = htons(prev_l4_size + size);
> > > +
> > > +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uni

[ovs-dev] [PATCH ovn] Fix system-ovn test failures

2019-10-29 Thread numans
From: Numan Siddique 

The commit b740928656a1("testsuite: Use ovn-macros instead of ofproto-macros.")
missed updating the system test suite files to include ovn-macros.at. This
patch adds it.

CC: Han Zhou 
Signed-off-by: Numan Siddique 
---
 tests/system-kmod-testsuite.at  | 1 +
 tests/system-userspace-testsuite.at | 1 +
 2 files changed, 2 insertions(+)

diff --git a/tests/system-kmod-testsuite.at b/tests/system-kmod-testsuite.at
index 6c8478093..2ccd9f1ce 100644
--- a/tests/system-kmod-testsuite.at
+++ b/tests/system-kmod-testsuite.at
@@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
 m4_include([tests/ovs-macros.at])
 m4_include([tests/ovsdb-macros.at])
 m4_include([tests/ofproto-macros.at])
+m4_include([tests/ovn-macros.at])
 m4_include([tests/system-common-macros.at])
 m4_include([tests/system-kmod-macros.at])
 
diff --git a/tests/system-userspace-testsuite.at 
b/tests/system-userspace-testsuite.at
index 784eedd2c..4022ae620 100644
--- a/tests/system-userspace-testsuite.at
+++ b/tests/system-userspace-testsuite.at
@@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS])
 m4_include([tests/ovs-macros.at])
 m4_include([tests/ovsdb-macros.at])
 m4_include([tests/ofproto-macros.at])
+m4_include([tests/ovn-macros.at])
 m4_include([tests/system-userspace-macros.at])
 m4_include([tests/system-common-macros.at])
 
-- 
2.21.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v6 ovn] Add DNSSL support to OVN

2019-10-29 Thread Lorenzo Bianconi
Introduce the possibility to specify a DNSSL option to Router
Advertisement packets. DNS Search list can be specified using
'dnssl' tag in the ipv6_ra_configs column of logical router
port table

Signed-off-by: Lorenzo Bianconi 
---
Changes since v5:
- add comment describing how domain names are encoded

Changes since v4:
- update documentation
- fix possible array out of bounds issue in packet_put_ra_dnssl_opt
- fix strtok_r usage
---
 controller/pinctrl.c | 63 
 lib/ovn-l7.h | 11 
 northd/ovn-northd.c  |  4 +++
 ovn-nb.xml   |  5 
 tests/ovn.at | 30 ++---
 5 files changed, 103 insertions(+), 10 deletions(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index f105ebb5c..655ba54a1 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -2194,6 +2194,7 @@ struct ipv6_ra_config {
 struct lport_addresses prefixes;
 struct in6_addr rdnss;
 bool has_rdnss;
+struct ds dnssl;
 };
 
 struct ipv6_ra_state {
@@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config)
 {
 if (config) {
 destroy_lport_addresses(&config->prefixes);
+ds_destroy(&config->dnssl);
 free(config);
 }
 }
@@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding *pb)
 nd_ra_min_interval_default(config->max_interval));
 config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT);
 config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK;
+ds_init(&config->dnssl);
 
 const char *address_mode = smap_get(&pb->options, "ipv6_ra_address_mode");
 if (!address_mode) {
@@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
*pb)
 }
 config->has_rdnss = !!rdnss;
 
+const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl");
+if (dnssl) {
+ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl));
+}
+
 return config;
 
 fail:
@@ -2354,6 +2362,57 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t num,
   prev_l4_size + len * 8));
 }
 
+static void
+packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime,
+char *dnssl_list)
+{
+size_t prev_l4_size = dp_packet_l4_size(b);
+size_t size = sizeof(struct ovs_nd_dnssl);
+struct ip6_hdr *nh = dp_packet_l3(b);
+char *t0, *r0, dnssl[255] = {};
+int i = 0;
+
+/* Multiple DNS Search List must be 'comma' separated
+ * (e.g. "a.b.c, d.e.f"). Domain names must be encoded
+ * as described in Section 3.1 of RFC1035.
+ * (e.g if dns list is a.b.c,www.ovn.org, it will be encoded as:
+ * 01 61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00
+ */
+for (t0 = strtok_r(dnssl_list, ",", &r0); t0;
+ t0 = strtok_r(NULL, ",", &r0)) {
+char *t1, *r1;
+
+size += strlen(t0) + 2;
+if (size > sizeof(dnssl)) {
+return;
+}
+
+for (t1 = strtok_r(t0, ".", &r1); t1;
+ t1 = strtok_r(NULL, ".", &r1)) {
+dnssl[i++] = strlen(t1);
+memcpy(&dnssl[i], t1, strlen(t1));
+i += strlen(t1);
+}
+dnssl[i++] = 0;
+}
+size = ROUND_UP(size, 8);
+nh->ip6_plen = htons(prev_l4_size + size);
+
+struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof *nd_dnssl);
+nd_dnssl->type = ND_OPT_DNSSL;
+nd_dnssl->len = size / 8;
+nd_dnssl->reserved = 0;
+put_16aligned_be32(&nd_dnssl->lifetime, lifetime);
+
+dp_packet_put(b, dnssl, size - sizeof *nd_dnssl);
+
+struct ovs_ra_msg *ra = dp_packet_l4(b);
+ra->icmph.icmp6_cksum = 0;
+uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
+ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra,
+  prev_l4_size + size));
+}
+
 /* Called with in the pinctrl_handler thread context. */
 static long long int
 ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra)
@@ -2382,6 +2441,10 @@ ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state 
*ra)
 packet_put_ra_rdnss_opt(&packet, 1, htonl(0x),
 &ra->config->rdnss);
 }
+if (ra->config->dnssl.length) {
+packet_put_ra_dnssl_opt(&packet, htonl(0x),
+ra->config->dnssl.string);
+}
 
 uint64_t ofpacts_stub[4096 / 8];
 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
index 12cee6329..5fc370bf5 100644
--- a/lib/ovn-l7.h
+++ b/lib/ovn-l7.h
@@ -231,6 +231,17 @@ struct nd_rdnss_opt {
 };
 BUILD_ASSERT_DECL(ND_RDNSS_OPT_LEN == sizeof(struct nd_rdnss_opt));
 
+/* DNSSL option RFC 6106 */
+#define ND_OPT_DNSSL31
+#define ND_DNSSL_OPT_LEN8
+struct ovs_nd_dnssl {
+u_int8_t type;  /* ND_OPT_DNSSL */
+u_int8_t len;   /* >= 2 */
+ovs_be16 re

Re: [ovs-dev] [PATCH] rhel: openvswitch-fedora.spec.in: Fix output redirect to null device

2019-10-29 Thread Simon Horman
On Mon, Oct 28, 2019 at 10:37:44AM +0200, Roi Dayan wrote:
> Add missing slash.
> 
> Fixes: 0447019df7c6 ("fedora-spec: added systemd post/postun/pre/preun 
> sections")
> Signed-off-by: Roi Dayan 

Thanks Roi,

I've applied this to master and backported it as far back as branch-2.5.

> ---
>  rhel/openvswitch-fedora.spec.in | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in
> index 3a87c6d0c339..0f635210147c 100644
> --- a/rhel/openvswitch-fedora.spec.in
> +++ b/rhel/openvswitch-fedora.spec.in
> @@ -340,7 +340,7 @@ fi
>  %else
>  # Package install, not upgrade
>  if [ $1 -eq 1 ]; then
> -/bin/systemctl daemon-reload >dev/null || :
> +/bin/systemctl daemon-reload >/dev/null || :
>  fi
>  %endif
>  
> -- 
> 2.8.4
> 
> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
> 
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v4 08/10] net: openvswitch: fix possible memleak on destroy flow-table

2019-10-29 Thread Tonghao Zhang
On Tue, Oct 29, 2019 at 3:38 PM Pravin Shelar  wrote:
>
> On Sun, Oct 27, 2019 at 11:49 PM Tonghao Zhang  
> wrote:
> >
> > On Thu, Oct 24, 2019 at 3:14 PM Pravin Shelar  wrote:
> > >
> > > On Tue, Oct 22, 2019 at 7:35 PM Tonghao Zhang  
> > > wrote:
> > > >
> > > > On Tue, Oct 22, 2019 at 2:58 PM Pravin Shelar  wrote:
> > > > >
> > > ...
> > >
> ...
> > > >  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
> > > > @@ -400,10 +458,9 @@ static struct table_instance
> > > > *table_instance_rehash(struct table_instance *ti,
> > > > return new_ti;
> > > >  }
> > > >
> > > > -int ovs_flow_tbl_flush(struct flow_table *flow_table)
> > > > +int ovs_flow_tbl_flush(struct flow_table *table)
> > > >  {
> > > > -   struct table_instance *old_ti, *new_ti;
> > > > -   struct table_instance *old_ufid_ti, *new_ufid_ti;
> > > > +   struct table_instance *new_ti, *new_ufid_ti;
> > > >
> > > > new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
> > > > if (!new_ti)
> > > > @@ -412,16 +469,12 @@ int ovs_flow_tbl_flush(struct flow_table 
> > > > *flow_table)
> > > > if (!new_ufid_ti)
> > > > goto err_free_ti;
> > > >
> > > > -   old_ti = ovsl_dereference(flow_table->ti);
> > > > -   old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
> > > > +   table_instance_destroy(table, true);
> > > >
> > > This would destroy running table causing unnecessary flow miss. Lets
> > > keep current scheme of setting up new table before destroying current
> > > one.
> > >
> > > > -   rcu_assign_pointer(flow_table->ti, new_ti);
> 
> ...
> >  /* Must be called with OVS mutex held. */
> >  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
> >  {
> > @@ -752,17 +794,7 @@ void ovs_flow_tbl_remove(struct flow_table
> > *table, struct sw_flow *flow)
> > struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
> >
> > BUG_ON(table->count == 0);
> > -   hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
> > -   table->count--;
> > -   if (ovs_identifier_is_ufid(&flow->id)) {
> > -   hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
> > -   table->ufid_count--;
> > -   }
> > -
> > -   /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
> > -* accessible as long as the RCU read lock is held.
> > -*/
> > -   flow_mask_remove(table, flow->mask);
> > +   table_instance_remove(table, ti, ufid_ti, flow, true);
> >  }
> Lets rename table_instance_remove() to imply it is freeing a flow.
hi Pravin, the function ovs_flow_free will free the flow actually. In
-ovs_flow_cmd_del
ovs_flow_tbl_remove
...
ovs_flow_free

In -table_instance_destroy
table_instance_remove
ovs_flow_free

But if rename the table_instance_remove, table_instance_flow_free ?
> Otherwise looks good.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovs v3 2/2] netdev-dpdk: Add dpdkvdpa port

2019-10-29 Thread Noa Levy



> -Original Message-
> From: William Tu [mailto:u9012...@gmail.com]
> Sent: Monday, October 28, 2019 10:46 PM
> To: Noa Levy 
> Cc: ovs-dev@openvswitch.org; Oz Shlomo ; Majd
> Dibbiny ; Ameer Mahagneh
> ; Eli Britstein 
> Subject: Re: [ovs-dev] [PATCH ovs v3 2/2] netdev-dpdk: Add dpdkvdpa port
> 
> Hi Noa,
> 
> Thanks for your reply.
> 
> > > > > Hi Noa,
> > > > >
> > > > > I have a couple more questions. I'm still at the learning stage
> > > > > of this new feature, thanks in advance for your patience.
> > > > >
> > > > > On Thu, Oct 17, 2019 at 02:16:56PM +0300, Noa Ezra wrote:
> > > > > > dpdkvdpa netdev works with 3 components:
> > > > > > vhost-user socket, vdpa device: real vdpa device or a VF and
> > > > > > representor of "vdpa device".
> > > > >
> > > > > What NIC card support this feature?
> > > > > I don't have real vdpa device, can I use Intel X540 VF feature?
> > > > >
> > > >
> > > > This feature will have two modes, SW and HW.
> > > > The SW mode doesn't depend on a real vdpa device and allows you to
> > > > use this feature even if you don't have a NIC that support it.
> > Although you need to use representors, so you need your NIC to support
> it.
> > > > The HW mode will be implemented in the future and will use a real
> > > > vdpa device. It will be better to use the HW mode if you have a
> > > > NIC that support
> > > it.
> > > >
> > > > For now, we only support the SW mode, when vdpa will have support
> > > > in dpdk, we will add the HW mode to OVS.
> > > >
> > > > > >
> > > > > > In order to add a new vDPA port, add a new port to existing
> > > > > > bridge with type dpdkvdpa and vDPA options:
> > > > > > ovs-vsctl add-port br0 vdpa0 -- set Interface vdpa0 type=dpdkvdpa
> > > > > >options:vdpa-socket-path=
> > > > > >options:vdpa-accelerator-devargs=
> > > > > >options:dpdk-devargs=,representor=[id]
> > > > > >
> > > > > > On this command OVS will create a new netdev:
> > > > > > 1. Register vhost-user-client device.
> > > > > > 2. Open and configure VF dpdk port.
> > > > > > 3. Open and configure representor dpdk port.
> > > > > >
> > > > > > The new netdev will use netdev_rxq_recv() function in order to
> > > > > > receive packets from VF and push to vhost-user and receive
> > > > > > packets from vhost-user and push to VF.
> > > > > >
> > > > > > Signed-off-by: Noa Ezra 
> > > > > > Reviewed-by: Oz Shlomo 
> > > > > > ---
> > > > > >  Documentation/automake.mk   |   1 +
> > > > > >  Documentation/topics/dpdk/index.rst |   1 +
> > > > > >  Documentation/topics/dpdk/vdpa.rst  |  90
> 
> > > > > >  NEWS|   1 +
> > > > > >  lib/netdev-dpdk.c   | 162
> > > > > 
> > > > > >  vswitchd/vswitch.xml|  25 ++
> > > > > >  6 files changed, 280 insertions(+)  create mode 100644
> > > > > > Documentation/topics/dpdk/vdpa.rst
> > > > > >
> > > > > > diff --git a/Documentation/automake.mk
> > > > b/Documentation/automake.mk
> > > > > > index cd68f3b..ee574bc 100644
> > > > > > --- a/Documentation/automake.mk
> > > > > > +++ b/Documentation/automake.mk
> > > > > > @@ -43,6 +43,7 @@ DOC_SOURCE = \
> > > > > > Documentation/topics/dpdk/ring.rst \
> > > > > > Documentation/topics/dpdk/vdev.rst \
> > > > > > Documentation/topics/dpdk/vhost-user.rst \
> > > > > > +   Documentation/topics/dpdk/vdpa.rst \
> > > > > > Documentation/topics/fuzzing/index.rst \
> > > > > > Documentation/topics/fuzzing/what-is-fuzzing.rst \
> > > > > >
> > > > > > Documentation/topics/fuzzing/ovs-fuzzing-infrastructure.rst \
> > > > > > diff --git a/Documentation/topics/dpdk/index.rst
> > > > > > b/Documentation/topics/dpdk/index.rst
> > > > > > index cf24a7b..c1d4ea7 100644
> > > > > > --- a/Documentation/topics/dpdk/index.rst
> > > > > > +++ b/Documentation/topics/dpdk/index.rst
> > > > > > @@ -41,3 +41,4 @@ The DPDK Datapath
> > > > > > /topics/dpdk/pdump
> > > > > > /topics/dpdk/jumbo-frames
> > > > > > /topics/dpdk/memory
> > > > > > +   /topics/dpdk/vdpa
> > > > > > diff --git a/Documentation/topics/dpdk/vdpa.rst
> > > > > > b/Documentation/topics/dpdk/vdpa.rst
> > > > > > new file mode 100644
> 
> 
> 
> > > > > > +2357,23 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq,
> > > > > > +struct
> > > > > dp_packet_batch *batch,
> > > > > >  return 0;
> > > > > >  }
> > > > > >
> > > > > > +static int
> > > > > > +netdev_dpdk_vdpa_rxq_recv(struct netdev_rxq *rxq,
> > > > > > +  struct dp_packet_batch *batch,
> > > > > > +  int *qfill) {
> > > > > > +struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev);
> > > > > > +int fwd_rx;
> > > > > > +int ret;
> > > > > > +
> > > > > > +fwd_rx = netdev_dpdk_vdpa_rxq_recv_impl(dev->relay,
> > > > > > + rxq->queue_id);
> > > > > I'm still not clear about the above function.
> > > > > So netdev_dpdk_vdpa_recv_impl()

Re: [ovs-dev] [PATCH v5 ovn] Add DNSSL support to OVN

2019-10-29 Thread Lorenzo Bianconi
> On Tue, Oct 29, 2019 at 3:29 PM Lorenzo Bianconi
>  wrote:
> >
> > Introduce the possibility to specify a DNSSL option to Router
> > Advertisement packets. DNS Search list can be specified using
> > 'dnssl' tag in the ipv6_ra_configs column of logical router
> > port table
> >
> > Signed-off-by: Lorenzo Bianconi 
> > ---
> > Changes since v4:
> > - update documentation
> > - fix possible array out of bounds issue in packet_put_ra_dnssl_opt
> > - fix strtok_r usage
> > ---
> >  controller/pinctrl.c | 59 
> >  lib/ovn-l7.h | 11 +
> >  northd/ovn-northd.c  |  4 +++
> >  ovn-nb.xml   |  5 
> >  tests/ovn.at | 30 ++
> >  5 files changed, 99 insertions(+), 10 deletions(-)
> >
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index f105ebb5c..cddf98648 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -2194,6 +2194,7 @@ struct ipv6_ra_config {
> >  struct lport_addresses prefixes;
> >  struct in6_addr rdnss;
> >  bool has_rdnss;
> > +struct ds dnssl;
> >  };
> >
> >  struct ipv6_ra_state {
> > @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config)
> >  {
> >  if (config) {
> >  destroy_lport_addresses(&config->prefixes);
> > +ds_destroy(&config->dnssl);
> >  free(config);
> >  }
> >  }
> > @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
> > *pb)
> >  nd_ra_min_interval_default(config->max_interval));
> >  config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", 
> > ND_MTU_DEFAULT);
> >  config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK;
> > +ds_init(&config->dnssl);
> >
> >  const char *address_mode = smap_get(&pb->options, 
> > "ipv6_ra_address_mode");
> >  if (!address_mode) {
> > @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct 
> > sbrec_port_binding *pb)
> >  }
> >  config->has_rdnss = !!rdnss;
> >
> > +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl");
> > +if (dnssl) {
> > +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl));
> > +}
> > +
> >  return config;
> >
> >  fail:
> > @@ -2354,6 +2362,53 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t 
> > num,
> >prev_l4_size + len * 
> > 8));
> >  }
> >
> > +static void
> > +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime,
> > +char *dnssl_list)
> > +{
> > +size_t prev_l4_size = dp_packet_l4_size(b);
> > +size_t size = sizeof(struct ovs_nd_dnssl);
> > +struct ip6_hdr *nh = dp_packet_l3(b);
> > +char *t0, *r0, dnssl[255] = {};
> > +int i = 0;
> > +
> > +/* Multiple DNS Search List must be 'comma' separated
> > + * (e.g. "a.b.c, d.e.f") */
> 
> Can you please add a comment on how the dns list will be encode with an 
> example.
> Like if dns list is a.b.c,www.ovn.org then it will be encoded as - 01
> 61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00
> 
> This will really help in understanding what's going on. Also can you
> link the RFC version.

Hi Numan,

Thx for the review. Ack, will do in v6

> 
> > +for (t0 = strtok_r(dnssl_list, ",", &r0); t0;
> > + t0 = strtok_r(NULL, ",", &r0)) {
> > +char *t1, *r1;
> > +
> > +size += strlen(t0) + 2;
> > +if (size > sizeof(dnssl)) {
> 
> For the sake of simplicity let's assume the length of dnssl is 10.
> Then if the dns list is a.b.c,d.e.f then this will cause the buffer to
> overflow right ?
> 
> For the first outer for loop with t0 = a.b.c, the 'size' will be 7. So 7 < 10.
> In the next outer for loop with t0 = d.e.f, the size will be again 7
> and it will cause the inner for loop  to copy the contents beyond
> the size of dnssl right ?
> 
> I think this code needs to be refined a bit to take all these things
> into consideration.
> 

I guess the check is fine since size value will not be reset:
size += strlen(t0) + 2;

Regards,
Lorenzo


> Thanks
> Numan
> 
> > +return;
> > +}
> > +
> 
> 
> 
> > +for (t1 = strtok_r(t0, ".", &r1); t1;
> > + t1 = strtok_r(NULL, ".", &r1)) {
> > +dnssl[i++] = strlen(t1);
> > +memcpy(&dnssl[i], t1, strlen(t1));
> > +i += strlen(t1);
> > +}
> > +dnssl[i++] = 0;
> > +}
> > +size = ROUND_UP(size, 8);
> > +nh->ip6_plen = htons(prev_l4_size + size);
> > +
> > +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof 
> > *nd_dnssl);
> > +nd_dnssl->type = ND_OPT_DNSSL;
> > +nd_dnssl->len = size / 8;
> > +nd_dnssl->reserved = 0;
> > +put_16aligned_be32(&nd_dnssl->lifetime, lifetime);
> > +
> > +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl);
> > +
> > +struct ovs_ra_msg *ra = dp_packet_l4(b);
> > +ra->icmph.icmp6_cksum = 0;
> > +uint32_t icmp_csum = packet_csum_pseu

Re: [ovs-dev] [PATCH v5 ovn] Add DNSSL support to OVN

2019-10-29 Thread Numan Siddique
On Tue, Oct 29, 2019 at 3:29 PM Lorenzo Bianconi
 wrote:
>
> Introduce the possibility to specify a DNSSL option to Router
> Advertisement packets. DNS Search list can be specified using
> 'dnssl' tag in the ipv6_ra_configs column of logical router
> port table
>
> Signed-off-by: Lorenzo Bianconi 
> ---
> Changes since v4:
> - update documentation
> - fix possible array out of bounds issue in packet_put_ra_dnssl_opt
> - fix strtok_r usage
> ---
>  controller/pinctrl.c | 59 
>  lib/ovn-l7.h | 11 +
>  northd/ovn-northd.c  |  4 +++
>  ovn-nb.xml   |  5 
>  tests/ovn.at | 30 ++
>  5 files changed, 99 insertions(+), 10 deletions(-)
>
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index f105ebb5c..cddf98648 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -2194,6 +2194,7 @@ struct ipv6_ra_config {
>  struct lport_addresses prefixes;
>  struct in6_addr rdnss;
>  bool has_rdnss;
> +struct ds dnssl;
>  };
>
>  struct ipv6_ra_state {
> @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config)
>  {
>  if (config) {
>  destroy_lport_addresses(&config->prefixes);
> +ds_destroy(&config->dnssl);
>  free(config);
>  }
>  }
> @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
> *pb)
>  nd_ra_min_interval_default(config->max_interval));
>  config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT);
>  config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK;
> +ds_init(&config->dnssl);
>
>  const char *address_mode = smap_get(&pb->options, 
> "ipv6_ra_address_mode");
>  if (!address_mode) {
> @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
> *pb)
>  }
>  config->has_rdnss = !!rdnss;
>
> +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl");
> +if (dnssl) {
> +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl));
> +}
> +
>  return config;
>
>  fail:
> @@ -2354,6 +2362,53 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t 
> num,
>prev_l4_size + len * 
> 8));
>  }
>
> +static void
> +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime,
> +char *dnssl_list)
> +{
> +size_t prev_l4_size = dp_packet_l4_size(b);
> +size_t size = sizeof(struct ovs_nd_dnssl);
> +struct ip6_hdr *nh = dp_packet_l3(b);
> +char *t0, *r0, dnssl[255] = {};
> +int i = 0;
> +
> +/* Multiple DNS Search List must be 'comma' separated
> + * (e.g. "a.b.c, d.e.f") */

Can you please add a comment on how the dns list will be encode with an example.
Like if dns list is a.b.c,www.ovn.org then it will be encoded as - 01
61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00

This will really help in understanding what's going on. Also can you
link the RFC version.

> +for (t0 = strtok_r(dnssl_list, ",", &r0); t0;
> + t0 = strtok_r(NULL, ",", &r0)) {
> +char *t1, *r1;
> +
> +size += strlen(t0) + 2;
> +if (size > sizeof(dnssl)) {

For the sake of simplicity let's assume the length of dnssl is 10.
Then if the dns list is a.b.c,d.e.f then this will cause the buffer to
overflow right ?

For the first outer for loop with t0 = a.b.c, the 'size' will be 7. So 7 < 10.
In the next outer for loop with t0 = d.e.f, the size will be again 7
and it will cause the inner for loop  to copy the contents beyond
the size of dnssl right ?

I think this code needs to be refined a bit to take all these things
into consideration.

Thanks
Numan

> +return;
> +}
> +



> +for (t1 = strtok_r(t0, ".", &r1); t1;
> + t1 = strtok_r(NULL, ".", &r1)) {
> +dnssl[i++] = strlen(t1);
> +memcpy(&dnssl[i], t1, strlen(t1));
> +i += strlen(t1);
> +}
> +dnssl[i++] = 0;
> +}
> +size = ROUND_UP(size, 8);
> +nh->ip6_plen = htons(prev_l4_size + size);
> +
> +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof 
> *nd_dnssl);
> +nd_dnssl->type = ND_OPT_DNSSL;
> +nd_dnssl->len = size / 8;
> +nd_dnssl->reserved = 0;
> +put_16aligned_be32(&nd_dnssl->lifetime, lifetime);
> +
> +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl);
> +
> +struct ovs_ra_msg *ra = dp_packet_l4(b);
> +ra->icmph.icmp6_cksum = 0;
> +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
> +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra,
> +  prev_l4_size + size));
> +}
> +
>  /* Called with in the pinctrl_handler thread context. */
>  static long long int
>  ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra)
> @@ -2382,6 +2437,10 @@ ipv6_ra_send(struct rconn *swconn, struct 
> ipv6_ra_state *ra)
>  packet_put_ra_

[ovs-dev] [PATCH v5 ovn] Add DNSSL support to OVN

2019-10-29 Thread Lorenzo Bianconi
Introduce the possibility to specify a DNSSL option to Router
Advertisement packets. DNS Search list can be specified using
'dnssl' tag in the ipv6_ra_configs column of logical router
port table

Signed-off-by: Lorenzo Bianconi 
---
Changes since v4:
- update documentation
- fix possible array out of bounds issue in packet_put_ra_dnssl_opt
- fix strtok_r usage
---
 controller/pinctrl.c | 59 
 lib/ovn-l7.h | 11 +
 northd/ovn-northd.c  |  4 +++
 ovn-nb.xml   |  5 
 tests/ovn.at | 30 ++
 5 files changed, 99 insertions(+), 10 deletions(-)

diff --git a/controller/pinctrl.c b/controller/pinctrl.c
index f105ebb5c..cddf98648 100644
--- a/controller/pinctrl.c
+++ b/controller/pinctrl.c
@@ -2194,6 +2194,7 @@ struct ipv6_ra_config {
 struct lport_addresses prefixes;
 struct in6_addr rdnss;
 bool has_rdnss;
+struct ds dnssl;
 };
 
 struct ipv6_ra_state {
@@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config)
 {
 if (config) {
 destroy_lport_addresses(&config->prefixes);
+ds_destroy(&config->dnssl);
 free(config);
 }
 }
@@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding *pb)
 nd_ra_min_interval_default(config->max_interval));
 config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT);
 config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK;
+ds_init(&config->dnssl);
 
 const char *address_mode = smap_get(&pb->options, "ipv6_ra_address_mode");
 if (!address_mode) {
@@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
*pb)
 }
 config->has_rdnss = !!rdnss;
 
+const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl");
+if (dnssl) {
+ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl));
+}
+
 return config;
 
 fail:
@@ -2354,6 +2362,53 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t num,
   prev_l4_size + len * 8));
 }
 
+static void
+packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime,
+char *dnssl_list)
+{
+size_t prev_l4_size = dp_packet_l4_size(b);
+size_t size = sizeof(struct ovs_nd_dnssl);
+struct ip6_hdr *nh = dp_packet_l3(b);
+char *t0, *r0, dnssl[255] = {};
+int i = 0;
+
+/* Multiple DNS Search List must be 'comma' separated
+ * (e.g. "a.b.c, d.e.f") */
+for (t0 = strtok_r(dnssl_list, ",", &r0); t0;
+ t0 = strtok_r(NULL, ",", &r0)) {
+char *t1, *r1;
+
+size += strlen(t0) + 2;
+if (size > sizeof(dnssl)) {
+return;
+}
+
+for (t1 = strtok_r(t0, ".", &r1); t1;
+ t1 = strtok_r(NULL, ".", &r1)) {
+dnssl[i++] = strlen(t1);
+memcpy(&dnssl[i], t1, strlen(t1));
+i += strlen(t1);
+}
+dnssl[i++] = 0;
+}
+size = ROUND_UP(size, 8);
+nh->ip6_plen = htons(prev_l4_size + size);
+
+struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof *nd_dnssl);
+nd_dnssl->type = ND_OPT_DNSSL;
+nd_dnssl->len = size / 8;
+nd_dnssl->reserved = 0;
+put_16aligned_be32(&nd_dnssl->lifetime, lifetime);
+
+dp_packet_put(b, dnssl, size - sizeof *nd_dnssl);
+
+struct ovs_ra_msg *ra = dp_packet_l4(b);
+ra->icmph.icmp6_cksum = 0;
+uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
+ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra,
+  prev_l4_size + size));
+}
+
 /* Called with in the pinctrl_handler thread context. */
 static long long int
 ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra)
@@ -2382,6 +2437,10 @@ ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state 
*ra)
 packet_put_ra_rdnss_opt(&packet, 1, htonl(0x),
 &ra->config->rdnss);
 }
+if (ra->config->dnssl.length) {
+packet_put_ra_dnssl_opt(&packet, htonl(0x),
+ra->config->dnssl.string);
+}
 
 uint64_t ofpacts_stub[4096 / 8];
 struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
index 12cee6329..5fc370bf5 100644
--- a/lib/ovn-l7.h
+++ b/lib/ovn-l7.h
@@ -231,6 +231,17 @@ struct nd_rdnss_opt {
 };
 BUILD_ASSERT_DECL(ND_RDNSS_OPT_LEN == sizeof(struct nd_rdnss_opt));
 
+/* DNSSL option RFC 6106 */
+#define ND_OPT_DNSSL31
+#define ND_DNSSL_OPT_LEN8
+struct ovs_nd_dnssl {
+u_int8_t type;  /* ND_OPT_DNSSL */
+u_int8_t len;   /* >= 2 */
+ovs_be16 reserved;
+ovs_16aligned_be32 lifetime;
+};
+BUILD_ASSERT_DECL(ND_DNSSL_OPT_LEN == sizeof(struct ovs_nd_dnssl));
+
 #define DHCPV6_DUID_LL  3
 #define DHCPV6_HW_TYPE_ETH  1
 
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 33fe9363c..194e4bf4a 100644
--- a/northd/ovn-northd.

Re: [ovs-dev] [PATCH ovs v3 0/2] Introduce dpdkvdpa netdev

2019-10-29 Thread Noa Levy



> -Original Message-
> From: Roni Bar Yanai
> Sent: Monday, October 28, 2019 10:25 PM
> To: Ilya Maximets ; Noa Levy ;
> ovs-dev@openvswitch.org
> Cc: Oz Shlomo ; Majd Dibbiny
> ; Ameer Mahagneh ; Eli
> Britstein ; William Tu ; Simon
> Horman 
> Subject: RE: [ovs-dev] [PATCH ovs v3 0/2] Introduce dpdkvdpa netdev
> 
> Hi ilya,
> please see inline
> 
> >-Original Message-
> >From: Ilya Maximets 
> >Sent: Monday, October 28, 2019 3:46 PM
> >To: Noa Levy ; ovs-dev@openvswitch.org; Roni Bar
> >Yanai 
> >Cc: Oz Shlomo ; Majd Dibbiny
> ;
> >Ameer Mahagneh ; Eli Britstein
> >; William Tu ; Simon Horman
> >
> >Subject: Re: [ovs-dev] [PATCH ovs v3 0/2] Introduce dpdkvdpa netdev
> >
> >On 17.10.2019 13:16, Noa Ezra wrote:
> >> There are two approaches to communicate with a guest, using virtIO or
> >> SR-IOV.
> >> SR-IOV allows working with port representor which is attached to the
> >> OVS and a matching VF is given with pass-through to the VM.
> >> HW rules can process packets from up-link and direct them to the VF
> >> without going through SW (OVS) and therefore SR-IOV gives the best
> >> performance.
> >> However, SR-IOV architecture requires that the guest will use a
> >> driver which is specific to the underlying HW. Specific HW driver has
> >> two main
> >> drawbacks:
> >> 1. Breaks virtualization in some sense (VM aware of the HW), can also
> >> limit the type of images supported.
> >> 2. Less natural support for live migration.
> >>
> >> Using virtIO interface solves both problems, but reduces performance
> >> and causes losing of some functionality, for example, for some HW
> >> offload, working directly with virtIO cannot be supported.
> >> In order to solve this conflict, we created a new netdev type-dpdkvdpa.
> >> The new netdev is basically similar to a regular dpdk netdev, but it
> >> has some additional functionality for transferring packets from
> >> virtIO guest (VM) to a VF and vice versa. With this solution we can
> >> benefit both SR-IOV and virtIO.
> >> vDPA netdev is designed to support both SW and HW use-cases.
> >> HW mode will be used to configure vDPA capable devices. The support
> >> for this mode is on progress in the dpdk community.
> >> SW acceleration is used to leverage SR-IOV offloads to virtIO guests
> >> by relaying packets between VF and virtio devices and as a pre-step
> >> for supporting vDPA in HW mode.
> >>
> >> Running example:
> >> 1. Configure OVS bridge and ports:
> >> ovs-vsctl add-br br0-ovs -- set bridge br0-ovs datapath_type=netdev
> >> ovs-vsctl add-port br0-ovs pf -- set Interface pf type=dpdk options: \
> >>  dpdk-devargs=
> >> ovs-vsctl add-port br0 vdpa0 -- set Interface vdpa0 type=dpdkvdpa \
> >>  options:vdpa-socket-path= \
> >>  options:vdpa-accelerator-devargs= \
> >>  options:dpdk-devargs=,representor=[id] 2. Run a
> >> virtIO guest (VM) in server mode that creates the socket of
> >> the vDPA port.
> >> 3. Send traffic.
> >>
> >> Noa Ezra (2):
> >>netdev-dpdk-vdpa: Introduce dpdkvdpa netdev
> >>netdev-dpdk: Add dpdkvdpa port
> >>
> >>   Documentation/automake.mk   |   1 +
> >>   Documentation/topics/dpdk/index.rst |   1 +
> >>   Documentation/topics/dpdk/vdpa.rst  |  90 +
> >>   NEWS|   1 +
> >>   lib/automake.mk |   4 +-
> >>   lib/netdev-dpdk-vdpa.c  | 750
> >
> >>   lib/netdev-dpdk-vdpa.h  |  54 +++
> >>   lib/netdev-dpdk.c   | 162 
> >>   vswitchd/vswitch.xml|  25 ++
> >>   9 files changed, 1087 insertions(+), 1 deletion(-)
> >>   create mode 100644 Documentation/topics/dpdk/vdpa.rst
> >>   create mode 100755 lib/netdev-dpdk-vdpa.c
> >>   create mode 100644 lib/netdev-dpdk-vdpa.h
> >>
> >
> >Hi, everyone.
> >
> >So, I have a few questions (mostly to Roni?):
> >
> >1. What happened to idea of implementing this as a DPDK vdev?
> We wanted to solve both OVS-kernel and OVS-DPDK issue.
> The main argument against it was that we allow to define ports that are not
> OF ports on the switch. regardless of how useful we think this feature can be
> it breaks something fundamental, and looking back I agree.
> The vdev, was a workaround, but after speaking with some of our DPDK
> experts, they had similar arguments, this time for DPDK world. You create a
> port that never gets packet and never sends packets. This totally doesn't
> make sense even if it can be useful in some cases. When we remove the
> kernel form the equation, we are left with vDPA.
> In fact we followed your suggestion of having SW vDPA and HW vDPA.
> This makes total sense, for example open stack can configure vDPA and let
> the OVS probe the device and go with HW or SW, leaving same functionality
> and same configuration.
> 
> >
> >2. What was the results of "direct output optimization" patch [1] testing?
> >At this point I also have to mention that OVS is going to ha

Re: [ovs-dev] [PATCH] lacp: warn transmit failure of lacp pdu

2019-10-29 Thread Gowrishankar Muthukrishnan
Hi Ben,
Idea is to log lacp specific tx failure. A one step further as in
ofproto_dpif_send_packet has to make the debug info more generic, even
though it would cater all of its calling functions.
For example, bundle_send_learning_packets reports such tx failures for
gratuitous packets. I think, we need to handle such warning at the calling
place itself to debug quickly.

Thanks,
Gowrishankar

On Fri, Oct 25, 2019 at 2:47 AM Ben Pfaff  wrote:

> On Mon, Oct 21, 2019 at 07:34:36PM +0530, Gowrishankar Muthukrishnan wrote:
> > It might be difficult to trace whether LACP PDU tx (as in
> > response) was successful when the pdu was not transmitted by
> > egress slave for various reasons (including resource contention
> > within NIC) and only way to trace its fate is by looking at
> > ofproto->stats.tx_[packets/bytes] and slave port stats.
> >
> > Adding a warning when there is tx failure could help user
> > debug at the root of this problem.
> >
> > Signed-off-by: Gowrishankar Muthukrishnan 
>
> This logic seems like it's probably true for all users of
> ofproto_dpif_send_packet(), not just LACP PDU transmission.  Would it be
> a good idea to simply make ofproto_dpif_send_packet() log a warning when
> tx fails?
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH ovn] ovndb-servers.ocf: Change from 'openvswitch' to 'ovn' in default vars.

2019-10-29 Thread Numan Siddique
On Tue, Oct 29, 2019 at 1:31 PM Numan Siddique  wrote:
>
> On Mon, Oct 28, 2019 at 10:57 PM Ben Pfaff  wrote:
> >
> > On Mon, Oct 28, 2019 at 10:25:34AM -0700, Ben Pfaff wrote:
> > > On Mon, Oct 28, 2019 at 08:04:15PM +0530, num...@ovn.org wrote:
> > > > From: Numan Siddique 
> > > >
> > > > CC: Aliasgar Ginwala 
> > > > Signed-off-by: Numan Siddique 
> > >
> > > Should we introduce something into the distro packaging, at the same
> > > time, to migrate from the old to the new locations automatically?
> >
> > Or, alternatively, should the script tolerate finding these in the old
> > location?
> >
> > Should we add something to the ovn upgrade guide talking about the
> > migration?
>
> I think we will have the same issue even for the ovn NB and SB dbs.
> The default path of OVN NB and
> SB dbs is - /etc/openvswitch before the split and after the split it
> is /etc/ovn.
>
> I agree that we should add upgrade guide taking about the migration. I
> will work on it.
>
> In this particular case, the default values are changed. And these
> values can always be overridden
> when starting or updating the pacemaker service like
>  - pcs resource create ovn-dbs ... ovn_nb_db_privkey=  ...
>
> In the case of openstack tripleo, where OVN services are run as
> containers, we have to mount
> the host directories properly. If the host path -
> /var/lib/openvswitch/ovn is mounted as /etc/openvswitch
> inside the container, we need to also mount the same host path to
> /etc/ovn so that the upgrade to
> new OVN post split will be seamless.
>
> I thought that this can be handled in distro install scripts like
> creating a symbolic link, but this
> doesn't address the deployments which run ovn services inside containers.
>
> I think we can also enhance the ovn-ctl script to tolerate old
> locations. I will explore more on this.
>
> I will rework this patch to look into both the locations if the user
> has not set these variables when
> starting the service from pacemaker.

I went ahead and applied the below changes as it makes sense. Otherwise
pacemaker ovn resource will fail since it can't find ovn-ctl script in
the old path.

diff --git a/utilities/ovndb-servers.ocf b/utilities/ovndb-servers.ocf
index cd4742668..4678a1dc9 100755
--- a/utilities/ovndb-servers.ocf
+++ b/utilities/ovndb-servers.ocf
@@ -2,7 +2,7 @@

 : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat}
 . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs
-: ${OVN_CTL_DEFAULT="/usr/share/openvswitch/scripts/ovn-ctl"}
+: ${OVN_CTL_DEFAULT="/usr/share/ovn/scripts/ovn-ctl"}
 : ${NB_MASTER_PORT_DEFAULT="6641"}
 : ${NB_MASTER_PROTO_DEFAULT="tcp"}
 : ${SB_MASTER_PORT_DEFAULT="6642"}


Thanks
Numan

>
> Thanks
> Numan
>
>
> > ___
> > dev mailing list
> > d...@openvswitch.org
> > https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 ovn 2/2] Add DNSSL support to OVN

2019-10-29 Thread Lorenzo Bianconi
> On Mon, Oct 28, 2019 at 8:10 PM Lorenzo Bianconi
>  wrote:
> >
> > Introduce the possibility to specify a DNSSL option to Router
> > Advertisement packets. DNS Search list can be specified using
> > 'dnssl' tag in the ipv6_ra_configs column of logical router
> > port table
> >
> > Signed-off-by: Lorenzo Bianconi 
> > ---
> >  controller/pinctrl.c | 51 
> >  lib/ovn-l7.h | 11 ++
> >  northd/ovn-northd.c  |  4 
> >  ovn-nb.xml   |  5 +
> >  tests/ovn.at | 30 +-
> >  5 files changed, 91 insertions(+), 10 deletions(-)
> >
> > diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> > index f105ebb5c..0bd1ef3ef 100644
> > --- a/controller/pinctrl.c
> > +++ b/controller/pinctrl.c
> > @@ -2194,6 +2194,7 @@ struct ipv6_ra_config {
> >  struct lport_addresses prefixes;
> >  struct in6_addr rdnss;
> >  bool has_rdnss;
> > +struct ds dnssl;
> >  };
> >
> >  struct ipv6_ra_state {
> > @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config)
> >  {
> >  if (config) {
> >  destroy_lport_addresses(&config->prefixes);
> > +ds_destroy(&config->dnssl);
> >  free(config);
> >  }
> >  }
> > @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
> > *pb)
> >  nd_ra_min_interval_default(config->max_interval));
> >  config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", 
> > ND_MTU_DEFAULT);
> >  config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK;
> > +ds_init(&config->dnssl);
> >
> >  const char *address_mode = smap_get(&pb->options, 
> > "ipv6_ra_address_mode");
> >  if (!address_mode) {
> > @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct 
> > sbrec_port_binding *pb)
> >  }
> >  config->has_rdnss = !!rdnss;
> >
> > +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl");
> > +if (dnssl) {
> > +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl));
> > +}
> > +
> >  return config;
> >
> >  fail:
> > @@ -2354,6 +2362,45 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t 
> > num,
> >prev_l4_size + len * 
> > 8));
> >  }
> >
> > +static void
> > +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime,
> > +char *dnssl_list)
> > +{
> > +char *t0, *r0 = dnssl_list, dnssl[255] = {};
> > +size_t prev_l4_size = dp_packet_l4_size(b);
> > +size_t size = sizeof(struct ovs_nd_dnssl);
> > +struct ip6_hdr *nh = dp_packet_l3(b);
> > +int i = 0;
> > +
> > +while ((t0 = strtok_r(r0, ",", &r0))) {

Hi Numan,

thx for the review.

> 
> Are we not suppose to pass NULL as the first argument to subsequent
> calls of strtok_r ?
> 

right, will fix in v5

> Is there a chance that we might cross dnssl 255 length and have a segfault ?
> 

right, will fix in v5

> Can you please add comments on how the dnssl string is to be encoded ?
> 

ack, will add in v5

> Since the code here, is already handling for multiple dnssl values in
> the "dnssl_list", we can just accept
> multiple values of dnssl right ? Right now the patch is restricting to
> just 1 dnssl for the CMS to configure.

right.

Regards,
Lorenzo

> 
> Thanks
> Numan
> 
> 
> > +char *t1, *r1 = t0;
> > +
> > +size += strlen(t0) + 2;
> > +while ((t1 = strtok_r(r1, ".", &r1))) {
> > +dnssl[i++] = strlen(t1);
> > +memcpy(&dnssl[i], t1, strlen(t1));
> > +i += strlen(t1);
> > +}
> > +dnssl[i++] = 0;
> > +}
> > +size = ROUND_UP(size, 8);
> > +nh->ip6_plen = htons(prev_l4_size + size);
> > +
> > +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof 
> > *nd_dnssl);
> > +nd_dnssl->type = ND_OPT_DNSSL;
> > +nd_dnssl->len = size / 8;
> > +nd_dnssl->reserved = 0;
> > +put_16aligned_be32(&nd_dnssl->lifetime, lifetime);
> > +
> > +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl);
> > +
> > +struct ovs_ra_msg *ra = dp_packet_l4(b);
> > +ra->icmph.icmp6_cksum = 0;
> > +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
> > +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra,
> > +  prev_l4_size + 
> > size));
> > +}
> > +
> >  /* Called with in the pinctrl_handler thread context. */
> >  static long long int
> >  ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra)
> > @@ -2382,6 +2429,10 @@ ipv6_ra_send(struct rconn *swconn, struct 
> > ipv6_ra_state *ra)
> >  packet_put_ra_rdnss_opt(&packet, 1, htonl(0x),
> >  &ra->config->rdnss);
> >  }
> > +if (ra->config->dnssl.length) {
> > +packet_put_ra_dnssl_opt(&packet, htonl(0x),
> > +ra->config->dnssl.string);
> > +}
> >
> >  uint64_t ofpacts_stub[40

Re: [ovs-dev] [PATCH ovn] ovndb-servers.ocf: Change from 'openvswitch' to 'ovn' in default vars.

2019-10-29 Thread Numan Siddique
On Mon, Oct 28, 2019 at 10:57 PM Ben Pfaff  wrote:
>
> On Mon, Oct 28, 2019 at 10:25:34AM -0700, Ben Pfaff wrote:
> > On Mon, Oct 28, 2019 at 08:04:15PM +0530, num...@ovn.org wrote:
> > > From: Numan Siddique 
> > >
> > > CC: Aliasgar Ginwala 
> > > Signed-off-by: Numan Siddique 
> >
> > Should we introduce something into the distro packaging, at the same
> > time, to migrate from the old to the new locations automatically?
>
> Or, alternatively, should the script tolerate finding these in the old
> location?
>
> Should we add something to the ovn upgrade guide talking about the
> migration?

I think we will have the same issue even for the ovn NB and SB dbs.
The default path of OVN NB and
SB dbs is - /etc/openvswitch before the split and after the split it
is /etc/ovn.

I agree that we should add upgrade guide taking about the migration. I
will work on it.

In this particular case, the default values are changed. And these
values can always be overridden
when starting or updating the pacemaker service like
 - pcs resource create ovn-dbs ... ovn_nb_db_privkey=  ...

In the case of openstack tripleo, where OVN services are run as
containers, we have to mount
the host directories properly. If the host path -
/var/lib/openvswitch/ovn is mounted as /etc/openvswitch
inside the container, we need to also mount the same host path to
/etc/ovn so that the upgrade to
new OVN post split will be seamless.

I thought that this can be handled in distro install scripts like
creating a symbolic link, but this
doesn't address the deployments which run ovn services inside containers.

I think we can also enhance the ovn-ctl script to tolerate old
locations. I will explore more on this.

I will rework this patch to look into both the locations if the user
has not set these variables when
starting the service from pacemaker.

Thanks
Numan


> ___
> dev mailing list
> d...@openvswitch.org
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v4 08/10] net: openvswitch: fix possible memleak on destroy flow-table

2019-10-29 Thread Pravin Shelar
On Sun, Oct 27, 2019 at 11:49 PM Tonghao Zhang  wrote:
>
> On Thu, Oct 24, 2019 at 3:14 PM Pravin Shelar  wrote:
> >
> > On Tue, Oct 22, 2019 at 7:35 PM Tonghao Zhang  
> > wrote:
> > >
> > > On Tue, Oct 22, 2019 at 2:58 PM Pravin Shelar  wrote:
> > > >
> > ...
> >
...
> > >  struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
> > > @@ -400,10 +458,9 @@ static struct table_instance
> > > *table_instance_rehash(struct table_instance *ti,
> > > return new_ti;
> > >  }
> > >
> > > -int ovs_flow_tbl_flush(struct flow_table *flow_table)
> > > +int ovs_flow_tbl_flush(struct flow_table *table)
> > >  {
> > > -   struct table_instance *old_ti, *new_ti;
> > > -   struct table_instance *old_ufid_ti, *new_ufid_ti;
> > > +   struct table_instance *new_ti, *new_ufid_ti;
> > >
> > > new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
> > > if (!new_ti)
> > > @@ -412,16 +469,12 @@ int ovs_flow_tbl_flush(struct flow_table 
> > > *flow_table)
> > > if (!new_ufid_ti)
> > > goto err_free_ti;
> > >
> > > -   old_ti = ovsl_dereference(flow_table->ti);
> > > -   old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
> > > +   table_instance_destroy(table, true);
> > >
> > This would destroy running table causing unnecessary flow miss. Lets
> > keep current scheme of setting up new table before destroying current
> > one.
> >
> > > -   rcu_assign_pointer(flow_table->ti, new_ti);

...
>  /* Must be called with OVS mutex held. */
>  void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
>  {
> @@ -752,17 +794,7 @@ void ovs_flow_tbl_remove(struct flow_table
> *table, struct sw_flow *flow)
> struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
>
> BUG_ON(table->count == 0);
> -   hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
> -   table->count--;
> -   if (ovs_identifier_is_ufid(&flow->id)) {
> -   hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
> -   table->ufid_count--;
> -   }
> -
> -   /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
> -* accessible as long as the RCU read lock is held.
> -*/
> -   flow_mask_remove(table, flow->mask);
> +   table_instance_remove(table, ti, ufid_ti, flow, true);
>  }
Lets rename table_instance_remove() to imply it is freeing a flow.
Otherwise looks good.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4 ovn 2/2] Add DNSSL support to OVN

2019-10-29 Thread Numan Siddique
On Mon, Oct 28, 2019 at 8:10 PM Lorenzo Bianconi
 wrote:
>
> Introduce the possibility to specify a DNSSL option to Router
> Advertisement packets. DNS Search list can be specified using
> 'dnssl' tag in the ipv6_ra_configs column of logical router
> port table
>
> Signed-off-by: Lorenzo Bianconi 
> ---
>  controller/pinctrl.c | 51 
>  lib/ovn-l7.h | 11 ++
>  northd/ovn-northd.c  |  4 
>  ovn-nb.xml   |  5 +
>  tests/ovn.at | 30 +-
>  5 files changed, 91 insertions(+), 10 deletions(-)
>
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index f105ebb5c..0bd1ef3ef 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -2194,6 +2194,7 @@ struct ipv6_ra_config {
>  struct lport_addresses prefixes;
>  struct in6_addr rdnss;
>  bool has_rdnss;
> +struct ds dnssl;
>  };
>
>  struct ipv6_ra_state {
> @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config)
>  {
>  if (config) {
>  destroy_lport_addresses(&config->prefixes);
> +ds_destroy(&config->dnssl);
>  free(config);
>  }
>  }
> @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
> *pb)
>  nd_ra_min_interval_default(config->max_interval));
>  config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT);
>  config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK;
> +ds_init(&config->dnssl);
>
>  const char *address_mode = smap_get(&pb->options, 
> "ipv6_ra_address_mode");
>  if (!address_mode) {
> @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
> *pb)
>  }
>  config->has_rdnss = !!rdnss;
>
> +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl");
> +if (dnssl) {
> +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl));
> +}
> +
>  return config;
>
>  fail:
> @@ -2354,6 +2362,45 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t 
> num,
>prev_l4_size + len * 
> 8));
>  }
>
> +static void
> +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime,
> +char *dnssl_list)
> +{
> +char *t0, *r0 = dnssl_list, dnssl[255] = {};
> +size_t prev_l4_size = dp_packet_l4_size(b);
> +size_t size = sizeof(struct ovs_nd_dnssl);
> +struct ip6_hdr *nh = dp_packet_l3(b);
> +int i = 0;
> +
> +while ((t0 = strtok_r(r0, ",", &r0))) {

Are we not suppose to pass NULL as the first argument to subsequent
calls of strtok_r ?

Is there a chance that we might cross dnssl 255 length and have a segfault ?

Can you please add comments on how the dnssl string is to be encoded ?

Since the code here, is already handling for multiple dnssl values in
the "dnssl_list", we can just accept
multiple values of dnssl right ? Right now the patch is restricting to
just 1 dnssl for the CMS to configure.

Thanks
Numan


> +char *t1, *r1 = t0;
> +
> +size += strlen(t0) + 2;
> +while ((t1 = strtok_r(r1, ".", &r1))) {
> +dnssl[i++] = strlen(t1);
> +memcpy(&dnssl[i], t1, strlen(t1));
> +i += strlen(t1);
> +}
> +dnssl[i++] = 0;
> +}
> +size = ROUND_UP(size, 8);
> +nh->ip6_plen = htons(prev_l4_size + size);
> +
> +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof 
> *nd_dnssl);
> +nd_dnssl->type = ND_OPT_DNSSL;
> +nd_dnssl->len = size / 8;
> +nd_dnssl->reserved = 0;
> +put_16aligned_be32(&nd_dnssl->lifetime, lifetime);
> +
> +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl);
> +
> +struct ovs_ra_msg *ra = dp_packet_l4(b);
> +ra->icmph.icmp6_cksum = 0;
> +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
> +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra,
> +  prev_l4_size + size));
> +}
> +
>  /* Called with in the pinctrl_handler thread context. */
>  static long long int
>  ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra)
> @@ -2382,6 +2429,10 @@ ipv6_ra_send(struct rconn *swconn, struct 
> ipv6_ra_state *ra)
>  packet_put_ra_rdnss_opt(&packet, 1, htonl(0x),
>  &ra->config->rdnss);
>  }
> +if (ra->config->dnssl.length) {
> +packet_put_ra_dnssl_opt(&packet, htonl(0x),
> +ra->config->dnssl.string);
> +}
>
>  uint64_t ofpacts_stub[4096 / 8];
>  struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
> index 12cee6329..5fc370bf5 100644
> --- a/lib/ovn-l7.h
> +++ b/lib/ovn-l7.h
> @@ -231,6 +231,17 @@ struct nd_rdnss_opt {
>  };
>  BUILD_ASSERT_DECL(ND_RDNSS_OPT_LEN == sizeof(struct nd_rdnss_opt));
>
> +/* DNSSL option RFC 6106 */
> +#define ND_OPT_DNSSL31
> +#define ND_DNSSL_OPT_L

Re: [ovs-dev] [PATCH v4 ovn 1/2] Add RDNSS support to OVN

2019-10-29 Thread Numan Siddique
On Mon, Oct 28, 2019 at 8:09 PM Lorenzo Bianconi
 wrote:
>
> Introduce the possibility to specify a RDNSS option to Router
> Advertisement packets. DNS IPv6 address can be specified using
> 'rdnss' tag in the ipv6_ra_configs column of logical router
> port table
>
> Acked-by: Mark Michelson 
> Signed-off-by: Lorenzo Bianconi 

Thanks. I applied this patch to master.

Numan

> ---
>  controller/pinctrl.c | 39 +++
>  lib/ovn-l7.h | 11 +++
>  northd/ovn-northd.c  |  5 +
>  ovn-nb.xml   |  5 +
>  tests/ovn.at | 27 +++
>  5 files changed, 79 insertions(+), 8 deletions(-)
>
> diff --git a/controller/pinctrl.c b/controller/pinctrl.c
> index d826da186..f105ebb5c 100644
> --- a/controller/pinctrl.c
> +++ b/controller/pinctrl.c
> @@ -2192,6 +2192,8 @@ struct ipv6_ra_config {
>  uint8_t mo_flags; /* Managed/Other flags for RAs */
>  uint8_t la_flags; /* On-link/autonomous flags for address prefixes */
>  struct lport_addresses prefixes;
> +struct in6_addr rdnss;
> +bool has_rdnss;
>  };
>
>  struct ipv6_ra_state {
> @@ -2289,6 +2291,12 @@ ipv6_ra_update_config(const struct sbrec_port_binding 
> *pb)
>  VLOG_WARN("Invalid IP source %s", ip_addr);
>  goto fail;
>  }
> +const char *rdnss = smap_get(&pb->options, "ipv6_ra_rdnss");
> +if (rdnss && !ipv6_parse(rdnss, &config->rdnss)) {
> +VLOG_WARN("Invalid RDNSS source %s", rdnss);
> +goto fail;
> +}
> +config->has_rdnss = !!rdnss;
>
>  return config;
>
> @@ -2319,6 +2327,33 @@ put_load(uint64_t value, enum mf_field_id dst, int 
> ofs, int n_bits,
>  bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits);
>  }
>
> +static void
> +packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t num,
> +ovs_be32 lifetime, const struct in6_addr *dns)
> +{
> +size_t prev_l4_size = dp_packet_l4_size(b);
> +struct ip6_hdr *nh = dp_packet_l3(b);
> +size_t len = 2 * num + 1;
> +
> +nh->ip6_plen = htons(prev_l4_size + len * 8);
> +
> +struct nd_rdnss_opt *nd_rdnss = dp_packet_put_uninit(b, sizeof 
> *nd_rdnss);
> +nd_rdnss->type = ND_OPT_RDNSS;
> +nd_rdnss->len = len;
> +nd_rdnss->reserved = 0;
> +put_16aligned_be32(&nd_rdnss->lifetime, lifetime);
> +
> +for (int i = 0; i < num; i++) {
> +dp_packet_put(b, &dns[i], sizeof(ovs_be32[4]));
> +}
> +
> +struct ovs_ra_msg *ra = dp_packet_l4(b);
> +ra->icmph.icmp6_cksum = 0;
> +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b));
> +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra,
> +  prev_l4_size + len * 
> 8));
> +}
> +
>  /* Called with in the pinctrl_handler thread context. */
>  static long long int
>  ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra)
> @@ -2343,6 +2378,10 @@ ipv6_ra_send(struct rconn *swconn, struct 
> ipv6_ra_state *ra)
>  ra->config->la_flags, 
> htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME),
>  htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME), addr);
>  }
> +if (ra->config->has_rdnss) {
> +packet_put_ra_rdnss_opt(&packet, 1, htonl(0x),
> +&ra->config->rdnss);
> +}
>
>  uint64_t ofpacts_stub[4096 / 8];
>  struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub);
> diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h
> index c93def450..12cee6329 100644
> --- a/lib/ovn-l7.h
> +++ b/lib/ovn-l7.h
> @@ -220,6 +220,17 @@ struct dhcpv6_opt_ia_na {
>  ovs_be32 t2;
>  });
>
> +/* RDNSS option RFC 6106 */
> +#define ND_RDNSS_OPT_LEN8
> +#define ND_OPT_RDNSS25
> +struct nd_rdnss_opt {
> +uint8_t type; /* ND_OPT_RDNSS. */
> +uint8_t len;  /* >= 3. */
> +ovs_be16 reserved;/* Always 0. */
> +ovs_16aligned_be32 lifetime;
> +};
> +BUILD_ASSERT_DECL(ND_RDNSS_OPT_LEN == sizeof(struct nd_rdnss_opt));
> +
>  #define DHCPV6_DUID_LL  3
>  #define DHCPV6_HW_TYPE_ETH  1
>
> diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
> index ea8ad7c2d..d1de36e08 100644
> --- a/northd/ovn-northd.c
> +++ b/northd/ovn-northd.c
> @@ -6485,6 +6485,11 @@ copy_ra_to_sb(struct ovn_port *op, const char 
> *address_mode)
>  smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s));
>  ds_destroy(&s);
>
> +const char *rdnss = smap_get(&op->nbrp->ipv6_ra_configs, "rdnss");
> +if (rdnss) {
> +smap_add(&options, "ipv6_ra_rdnss", rdnss);
> +}
> +
>  smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s);
>
>  sbrec_port_binding_set_options(op->sb, &options);
> diff --git a/ovn-nb.xml b/ovn-nb.xml
> index 1504f8fca..2faf9390b 100644
> --- a/ovn-nb.xml
> +++ b/ovn-nb.xml
> @@ -1885,6 +1885,11 @@
>  is one-third of ,
>  i.e. 200 seconds if that key is unset.
>
> +
> +  
> +IPv6 address of RDNS