Re: [ovs-dev] [PATCH v11 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH custom stats.
Hi All, Please review the update patch. Changes since v10 : Corrected the spelling mistake in the commit message. Thanks & Regards, Sriram. -Original Message- From: Sriram Vatala Sent: 29 October 2019 20:20 To: ovs-dev@openvswitch.org; ktray...@redhat.com; i.maxim...@ovn.org Cc: Ilya Maximets ; Sriram Vatala Subject: [PATCH v11 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH custom stats. From: Ilya Maximets This is yet another refactoring for upcoming detailed drop stats. It allows to use single function for all the software calculated statistics in netdev-dpdk for both vhost and ETH ports. UINT64_MAX used as a marker for non-supported statistics in a same way as it's done in bridge.c for common netdev stats. Co-authored-by: Sriram Vatala Cc: Sriram Vatala Signed-off-by: Ilya Maximets Signed-off-by: Sriram Vatala --- lib/netdev-dpdk.c | 69 +++ 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 04e1a2d1b..2cc2516a9 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -474,6 +474,8 @@ struct netdev_rxq_dpdk { static void netdev_dpdk_destruct(struct netdev *netdev); static void netdev_dpdk_vhost_destruct(struct netdev *netdev); +static int netdev_dpdk_get_sw_custom_stats(const struct netdev *, + struct netdev_custom_stats +*); static void netdev_dpdk_clear_xstats(struct netdev_dpdk *dev); int netdev_dpdk_get_vid(const struct netdev_dpdk *dev); @@ -1174,7 +1176,7 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, dev->rte_xstats_ids = NULL; dev->rte_xstats_ids_size = 0; -dev->tx_retries = 0; +dev->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX; return 0; } @@ -2775,7 +2777,9 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev, uint32_t i; struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); -int rte_xstats_ret; +int rte_xstats_ret, sw_stats_size; + +netdev_dpdk_get_sw_custom_stats(netdev, custom_stats); ovs_mutex_lock(&dev->mutex); @@ -2790,23 +2794,22 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev, if (rte_xstats_ret > 0 && rte_xstats_ret <= dev->rte_xstats_ids_size) { -custom_stats->size = rte_xstats_ret; -custom_stats->counters = -(struct netdev_custom_counter *) xcalloc(rte_xstats_ret, -sizeof(struct netdev_custom_counter)); +sw_stats_size = custom_stats->size; +custom_stats->size += rte_xstats_ret; +custom_stats->counters = xrealloc(custom_stats->counters, + custom_stats->size * + sizeof + *custom_stats->counters); for (i = 0; i < rte_xstats_ret; i++) { -ovs_strlcpy(custom_stats->counters[i].name, +ovs_strlcpy(custom_stats->counters[sw_stats_size + + i].name, netdev_dpdk_get_xstat_name(dev, dev->rte_xstats_ids[i]), NETDEV_CUSTOM_STATS_NAME_SIZE); -custom_stats->counters[i].value = values[i]; +custom_stats->counters[sw_stats_size + i].value = + values[i]; } } else { VLOG_WARN("Cannot get XSTATS values for port: "DPDK_PORT_ID_FMT, dev->port_id); -custom_stats->counters = NULL; -custom_stats->size = 0; /* Let's clear statistics cache, so it will be * reconfigured */ netdev_dpdk_clear_xstats(dev); @@ -2821,39 +2824,47 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev, } static int -netdev_dpdk_vhost_get_custom_stats(const struct netdev *netdev, - struct netdev_custom_stats *custom_stats) +netdev_dpdk_get_sw_custom_stats(const struct netdev *netdev, +struct netdev_custom_stats +*custom_stats) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); -int i; +int i, n; -#define VHOST_CSTATS \ -VHOST_CSTAT(tx_retries) +#define SW_CSTATS \ +SW_CSTAT(tx_retries) -#define VHOST_CSTAT(NAME) + 1 -custom_stats->size = VHOST_CSTATS; -#undef VHOST_CSTAT +#define SW_CSTAT(NAME) + 1 +custom_stats->size = SW_CSTATS; +#undef SW_CSTAT custom_stats->counters = xcalloc(custom_stats->size, sizeof *custom_stats->counters); -i = 0; -#define VHOST_CSTAT(NAME) \ -ovs_strlcpy(custom_stats->counters[i++].name, #NAME, \ -NETDEV_CUSTOM_STATS_NAME_SIZE); -VHOST_CSTATS; -#undef VHOST_CSTAT ovs_mutex_lock(&dev->mutex); rte_spinlock_lock(&dev->stats_lock); i = 0; -#define VHOST_CSTAT(NAME) \ +#define SW_CSTAT(NAME) \ custom_stats->counters[i++].value = dev->NAME; -VHOS
[ovs-dev] [PATCH v11 3/3] ovs-bugtool: Script to collect the port statistics
Sometimes, analysing the drop statistics of the ports will be helpful in debugging. This patch adds script to collect all supported port stats which also includes the drop counters in userspace datapath. The output of this script is included in the bugtool output. Signed-off-by: Sriram Vatala --- utilities/bugtool/automake.mk | 3 ++- utilities/bugtool/ovs-bugtool-get-port-stats | 15 +++ .../plugins/network-status/openvswitch.xml| 1 + 3 files changed, 18 insertions(+), 1 deletion(-) create mode 100755 utilities/bugtool/ovs-bugtool-get-port-stats diff --git a/utilities/bugtool/automake.mk b/utilities/bugtool/automake.mk index 4c85b9cba..0a9b93088 100644 --- a/utilities/bugtool/automake.mk +++ b/utilities/bugtool/automake.mk @@ -21,7 +21,8 @@ bugtool_scripts = \ utilities/bugtool/ovs-bugtool-ovs-bridge-datapath-type \ utilities/bugtool/ovs-bugtool-ovs-vswitchd-threads-affinity \ utilities/bugtool/ovs-bugtool-qos-configs \ - utilities/bugtool/ovs-bugtool-get-dpdk-nic-numa + utilities/bugtool/ovs-bugtool-get-dpdk-nic-numa \ + utilities/bugtool/ovs-bugtool-get-port-stats scripts_SCRIPTS += $(bugtool_scripts) diff --git a/utilities/bugtool/ovs-bugtool-get-port-stats b/utilities/bugtool/ovs-bugtool-get-port-stats new file mode 100755 index 0..23e61034e --- /dev/null +++ b/utilities/bugtool/ovs-bugtool-get-port-stats @@ -0,0 +1,15 @@ +#! /bin/bash + +#Iterate through each port of every bridge and print +#the port statistics + +for bridge in `ovs-vsctl -- --real list-br` +do +echo "${bridge} : " +echo " ${bridge} : `ovs-vsctl get interface ${bridge} statistics`" +for iface in `ovs-vsctl list-ifaces ${bridge}` +do +echo " ${iface} : `ovs-vsctl get interface ${iface} statistics`" +done +echo -e "\n" +done diff --git a/utilities/bugtool/plugins/network-status/openvswitch.xml b/utilities/bugtool/plugins/network-status/openvswitch.xml index b0e7a1510..72aa44930 100644 --- a/utilities/bugtool/plugins/network-status/openvswitch.xml +++ b/utilities/bugtool/plugins/network-status/openvswitch.xml @@ -41,4 +41,5 @@ /usr/share/openvswitch/scripts/ovs-bugtool-ovs-ofctl-loop-over-bridges "dump-group-stats" /usr/share/openvswitch/scripts/ovs-bugtool-get-dpdk-nic-numa ip -s -s link show +/usr/share/openvswitch/scripts/ovs-bugtool-get-port-stats -- 2.20.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v11 2/3] netdev-dpdk : Detailed packet drop statistics
OVS may be unable to transmit packets for multiple reasons on the userspace datapath and today there is a single counter to track packets dropped due to any of those reasons. This patch adds custom software stats for the different reasons packets may be dropped during tx/rx on the userspace datapath in OVS. - MTU drops : drops that occur due to a too large packet size - Qos drops : drops that occur due to egress/ingress QOS - Tx failures: drops as returned by the DPDK PMD send function Note that the reason for tx failures is not specified in OVS. In practice for vhost ports it is most common that tx failures are because there are not enough available descriptors, which is usually caused by misconfiguration of the guest queues and/or because the guest is not consuming packets fast enough from the queues. These counters are displayed along with other stats in "ovs-vsctl get interface statistics" command and are available for dpdk and vhostuser/vhostuserclient ports. Also the existing "tx_retries" counter for vhost ports has been renamed to "ovs_tx_retries", so that all the custom statistics that OVS accumulates itself will have the prefix "ovs_". This will prevent any custom stats names overlapping with driver/HW stats. Signed-off-by: Sriram Vatala --- Documentation/topics/dpdk/bridge.rst | 6 ++ Documentation/topics/dpdk/vhost-user.rst | 2 +- lib/netdev-dpdk.c| 82 +++- 3 files changed, 72 insertions(+), 18 deletions(-) diff --git a/Documentation/topics/dpdk/bridge.rst b/Documentation/topics/dpdk/bridge.rst index d9bc7eba4..f0ef42ecc 100644 --- a/Documentation/topics/dpdk/bridge.rst +++ b/Documentation/topics/dpdk/bridge.rst @@ -75,6 +75,12 @@ OpenFlow14`` option:: $ ovs-ofctl -O OpenFlow14 dump-ports br0 +There are custom statistics that OVS accumulates itself and these stats has +``ovs_`` as prefix. These custom stats are shown along with other stats +using the following command:: + +$ ovs-vsctl get Interface statistics + EMC Insertion Probability - diff --git a/Documentation/topics/dpdk/vhost-user.rst b/Documentation/topics/dpdk/vhost-user.rst index cda5b122f..ec0caeb16 100644 --- a/Documentation/topics/dpdk/vhost-user.rst +++ b/Documentation/topics/dpdk/vhost-user.rst @@ -551,7 +551,7 @@ processing packets at the required rate. The amount of Tx retries on a vhost-user or vhost-user-client interface can be shown with:: - $ ovs-vsctl get Interface dpdkvhostclient0 statistics:tx_retries + $ ovs-vsctl get Interface dpdkvhostclient0 statistics:ovs_tx_retries vhost-user Dequeue Zero Copy (experimental) --- diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 2cc2516a9..6922e61ca 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -174,6 +174,20 @@ static const struct vhost_device_ops virtio_net_device_ops = .destroy_connection = destroy_connection, }; +/* Custom software stats for dpdk ports */ +struct netdev_dpdk_sw_stats { +/* No. of retries when unable to transmit. */ +uint64_t tx_retries; +/* Packet drops when unable to transmit; Probably Tx queue is full. */ +uint64_t tx_failure_drops; +/* Packet length greater than device MTU. */ +uint64_t tx_mtu_exceeded_drops; +/* Packet drops in egress policer processing. */ +uint64_t tx_qos_drops; +/* Packet drops in ingress policer processing. */ +uint64_t rx_qos_drops; +}; + enum { DPDK_RING_SIZE = 256 }; BUILD_ASSERT_DECL(IS_POW2(DPDK_RING_SIZE)); enum { DRAIN_TSC = 20ULL }; @@ -416,11 +430,10 @@ struct netdev_dpdk { PADDED_MEMBERS(CACHE_LINE_SIZE, struct netdev_stats stats; -/* Custom stat for retries when unable to transmit. */ -uint64_t tx_retries; +struct netdev_dpdk_sw_stats *sw_stats; /* Protects stats */ rte_spinlock_t stats_lock; -/* 4 pad bytes here. */ +/* 36 pad bytes here. */ ); PADDED_MEMBERS(CACHE_LINE_SIZE, @@ -1176,7 +1189,8 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, dev->rte_xstats_ids = NULL; dev->rte_xstats_ids_size = 0; -dev->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX; +dev->sw_stats = xzalloc(sizeof *dev->sw_stats); +dev->sw_stats->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX; return 0; } @@ -1362,6 +1376,7 @@ common_destruct(struct netdev_dpdk *dev) ovs_list_remove(&dev->list_node); free(ovsrcu_get_protected(struct ingress_policer *, &dev->ingress_policer)); +free(dev->sw_stats); ovs_mutex_destroy(&dev->mutex); } @@ -2212,6 +2227,7 @@ netdev_dpdk_vhost_rxq_recv(struct netdev_rxq *rxq, rte_spinlock_lock(&dev->stats_lock); netdev_dpdk_vhost_update_rx_counters(&dev->stats, batch->packets, nb_rx, dropped); +dev->sw_stats->rx_qos_drops += dropped; rt
[ovs-dev] [PATCH v11 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH custom stats.
From: Ilya Maximets This is yet another refactoring for upcoming detailed drop stats. It allows to use single function for all the software calculated statistics in netdev-dpdk for both vhost and ETH ports. UINT64_MAX used as a marker for non-supported statistics in a same way as it's done in bridge.c for common netdev stats. Co-authored-by: Sriram Vatala Cc: Sriram Vatala Signed-off-by: Ilya Maximets Signed-off-by: Sriram Vatala --- lib/netdev-dpdk.c | 69 +++ 1 file changed, 40 insertions(+), 29 deletions(-) diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index 04e1a2d1b..2cc2516a9 100644 --- a/lib/netdev-dpdk.c +++ b/lib/netdev-dpdk.c @@ -474,6 +474,8 @@ struct netdev_rxq_dpdk { static void netdev_dpdk_destruct(struct netdev *netdev); static void netdev_dpdk_vhost_destruct(struct netdev *netdev); +static int netdev_dpdk_get_sw_custom_stats(const struct netdev *, + struct netdev_custom_stats *); static void netdev_dpdk_clear_xstats(struct netdev_dpdk *dev); int netdev_dpdk_get_vid(const struct netdev_dpdk *dev); @@ -1174,7 +1176,7 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, dev->rte_xstats_ids = NULL; dev->rte_xstats_ids_size = 0; -dev->tx_retries = 0; +dev->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX; return 0; } @@ -2775,7 +2777,9 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev, uint32_t i; struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); -int rte_xstats_ret; +int rte_xstats_ret, sw_stats_size; + +netdev_dpdk_get_sw_custom_stats(netdev, custom_stats); ovs_mutex_lock(&dev->mutex); @@ -2790,23 +2794,22 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev, if (rte_xstats_ret > 0 && rte_xstats_ret <= dev->rte_xstats_ids_size) { -custom_stats->size = rte_xstats_ret; -custom_stats->counters = -(struct netdev_custom_counter *) xcalloc(rte_xstats_ret, -sizeof(struct netdev_custom_counter)); +sw_stats_size = custom_stats->size; +custom_stats->size += rte_xstats_ret; +custom_stats->counters = xrealloc(custom_stats->counters, + custom_stats->size * + sizeof *custom_stats->counters); for (i = 0; i < rte_xstats_ret; i++) { -ovs_strlcpy(custom_stats->counters[i].name, +ovs_strlcpy(custom_stats->counters[sw_stats_size + i].name, netdev_dpdk_get_xstat_name(dev, dev->rte_xstats_ids[i]), NETDEV_CUSTOM_STATS_NAME_SIZE); -custom_stats->counters[i].value = values[i]; +custom_stats->counters[sw_stats_size + i].value = values[i]; } } else { VLOG_WARN("Cannot get XSTATS values for port: "DPDK_PORT_ID_FMT, dev->port_id); -custom_stats->counters = NULL; -custom_stats->size = 0; /* Let's clear statistics cache, so it will be * reconfigured */ netdev_dpdk_clear_xstats(dev); @@ -2821,39 +2824,47 @@ netdev_dpdk_get_custom_stats(const struct netdev *netdev, } static int -netdev_dpdk_vhost_get_custom_stats(const struct netdev *netdev, - struct netdev_custom_stats *custom_stats) +netdev_dpdk_get_sw_custom_stats(const struct netdev *netdev, +struct netdev_custom_stats *custom_stats) { struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); -int i; +int i, n; -#define VHOST_CSTATS \ -VHOST_CSTAT(tx_retries) +#define SW_CSTATS \ +SW_CSTAT(tx_retries) -#define VHOST_CSTAT(NAME) + 1 -custom_stats->size = VHOST_CSTATS; -#undef VHOST_CSTAT +#define SW_CSTAT(NAME) + 1 +custom_stats->size = SW_CSTATS; +#undef SW_CSTAT custom_stats->counters = xcalloc(custom_stats->size, sizeof *custom_stats->counters); -i = 0; -#define VHOST_CSTAT(NAME) \ -ovs_strlcpy(custom_stats->counters[i++].name, #NAME, \ -NETDEV_CUSTOM_STATS_NAME_SIZE); -VHOST_CSTATS; -#undef VHOST_CSTAT ovs_mutex_lock(&dev->mutex); rte_spinlock_lock(&dev->stats_lock); i = 0; -#define VHOST_CSTAT(NAME) \ +#define SW_CSTAT(NAME) \ custom_stats->counters[i++].value = dev->NAME; -VHOST_CSTATS; -#undef VHOST_CSTAT +SW_CSTATS; +#undef SW_CSTAT rte_spinlock_unlock(&dev->stats_lock); ovs_mutex_unlock(&dev->mutex); +i = 0; +n = 0; +#define SW_CSTAT(NAME) \ +if (custom_stats->counters[i].value != UINT64_MAX) { \ +ovs_strlcpy(custom_stats->counters[n].name, #NAME, \ +
Re: [ovs-dev] [PATCH v10 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH custom stats.
Thanks Kevin for reviewing and acknowledging the patch. I will correct the spelling and send the updated patch. Thanks & Regards, Sriram. -Original Message- From: Kevin Traynor Sent: 25 October 2019 20:36 To: Sriram Vatala ; ovs-dev@openvswitch.org; i.maxim...@ovn.org Cc: Ilya Maximets Subject: Re: [PATCH v10 1/3] netdev-dpdk: Reuse vhost function for dpdk ETH custom stats. On 24/10/2019 19:21, Sriram Vatala wrote: > From: Ilya Maximets > > This is yet another refactoring for upcoming detailed drop stats. > It allowes to use single function for all the software calculated s/allowes/allows > statistics in netdev-dpdk for both vhost and ETH ports. > > UINT64_MAX used as a marker for non-supported statistics in a same way > as it's done in bridge.c for common netdev stats. > > Co-authored-by: Sriram Vatala > Cc: Sriram Vatala > Signed-off-by: Ilya Maximets > Signed-off-by: Sriram Vatala Acked-by: Kevin Traynor > --- > lib/netdev-dpdk.c | 69 > +++ > 1 file changed, 40 insertions(+), 29 deletions(-) > > diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c index > 04e1a2d1b..2cc2516a9 100644 > --- a/lib/netdev-dpdk.c > +++ b/lib/netdev-dpdk.c > @@ -474,6 +474,8 @@ struct netdev_rxq_dpdk { static void > netdev_dpdk_destruct(struct netdev *netdev); static void > netdev_dpdk_vhost_destruct(struct netdev *netdev); > > +static int netdev_dpdk_get_sw_custom_stats(const struct netdev *, > + struct netdev_custom_stats > +*); > static void netdev_dpdk_clear_xstats(struct netdev_dpdk *dev); > > int netdev_dpdk_get_vid(const struct netdev_dpdk *dev); @@ -1174,7 > +1176,7 @@ common_construct(struct netdev *netdev, dpdk_port_t port_no, > dev->rte_xstats_ids = NULL; > dev->rte_xstats_ids_size = 0; > > -dev->tx_retries = 0; > +dev->tx_retries = (dev->type == DPDK_DEV_VHOST) ? 0 : UINT64_MAX; > > return 0; > } > @@ -2775,7 +2777,9 @@ netdev_dpdk_get_custom_stats(const struct netdev > *netdev, > > uint32_t i; > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > -int rte_xstats_ret; > +int rte_xstats_ret, sw_stats_size; > + > +netdev_dpdk_get_sw_custom_stats(netdev, custom_stats); > > ovs_mutex_lock(&dev->mutex); > > @@ -2790,23 +2794,22 @@ netdev_dpdk_get_custom_stats(const struct netdev > *netdev, > if (rte_xstats_ret > 0 && > rte_xstats_ret <= dev->rte_xstats_ids_size) { > > -custom_stats->size = rte_xstats_ret; > -custom_stats->counters = > -(struct netdev_custom_counter *) > xcalloc(rte_xstats_ret, > -sizeof(struct netdev_custom_counter)); > +sw_stats_size = custom_stats->size; > +custom_stats->size += rte_xstats_ret; > +custom_stats->counters = xrealloc(custom_stats->counters, > + custom_stats->size * > + sizeof > + *custom_stats->counters); > > for (i = 0; i < rte_xstats_ret; i++) { > -ovs_strlcpy(custom_stats->counters[i].name, > +ovs_strlcpy(custom_stats->counters[sw_stats_size + > + i].name, > netdev_dpdk_get_xstat_name(dev, > > dev->rte_xstats_ids[i]), > NETDEV_CUSTOM_STATS_NAME_SIZE); > -custom_stats->counters[i].value = values[i]; > +custom_stats->counters[sw_stats_size + i].value = > + values[i]; > } > } else { > VLOG_WARN("Cannot get XSTATS values for port: > "DPDK_PORT_ID_FMT, >dev->port_id); > -custom_stats->counters = NULL; > -custom_stats->size = 0; > /* Let's clear statistics cache, so it will be > * reconfigured */ > netdev_dpdk_clear_xstats(dev); @@ -2821,39 +2824,47 @@ > netdev_dpdk_get_custom_stats(const struct netdev *netdev, } > > static int > -netdev_dpdk_vhost_get_custom_stats(const struct netdev *netdev, > - struct netdev_custom_stats > *custom_stats) > +netdev_dpdk_get_sw_custom_stats(const struct netdev *netdev, > +struct netdev_custom_stats > +*custom_stats) > { > struct netdev_dpdk *dev = netdev_dpdk_cast(netdev); > -int i; > +int i, n; > > -#define VHOST_CSTATS \ > -VHOST_CSTAT(tx_retries) > +#define SW_CSTATS \ > +SW_CSTAT(tx_retries) > > -#define VHOST_CSTAT(NAME) + 1 > -custom_stats->size = VHOST_CSTATS; > -#undef VHOST_CSTAT > +#define SW_CSTAT(NAME) + 1 > +custom_stats->size = SW_CSTATS; > +#undef SW_CSTAT > custom_stats->counters = xcalloc(custom_stats->size, > sizeof *custom_stats->counters); > -i = 0; > -#define VHOST_CSTAT(NAME) \ > -ovs_strlcpy(custom_stats->counters[i++].name, #NAME, \ > -
[ovs-dev] Use more than 200000 flows in datapath/kernel cache flows
Sending again with a meaningful subject:) Dear ovs-dev, I'm trying to see if I can increase the number of flows supported in kernel cache. I found two relevant config options (other_config:flow-limit and upcall/flow limit). My understanding was that other_config:flow-limit is for the normal openflow flows while upcall is for the kernel flows (http://docs.openvswitch.org/en/latest/faq/design/ question one). It seems I can change the limit for Openflow flows. This works: $ovs-vsctl --no-wait set Open_vSwitch . other_config:flow-limit=21 $ovs-vsctl --no-wait get Open_vSwitch . other_config:flow-limit "21" However I cannot change the limit for kernel flows. The set-flow-limit command made a confirmation but the show command still shows *(limit 20)* $ovs-appctl upcall/set-flow-limit 21 set flow_limit to 21 $ovs-appctl upcall/show system@ovs-system: flows : (current 70) (avg 68) (max 92) (limit 20) dump duration : 3ms ufid enabled : true 127: (keys 3) ... My question is, is it possible to change the limit on kernel flows and is upcall the right command? Thanks, Jerry ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH net-next v5 00/10] optimize openvswitch flow looking up
Date: Sat, 19 Oct 2019 16:08:34 +0800 The date on your postings is in the past. ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] (no subject)
Dear ovs-dev, I'm trying to see if I can increase the number of flows supported in kernel cache. I found two relevant config options (other_config:flow-limit and upcall/flow limit). My understanding was that other_config:flow-limit is for the normal openflow flows while upcall is for the kernel flows (http://docs.openvswitch.org/en/latest/faq/design/ question one). It seems I can change the limit for Openflow flows. This works: $ovs-vsctl --no-wait set Open_vSwitch . other_config:flow-limit=21 $ovs-vsctl --no-wait get Open_vSwitch . other_config:flow-limit "21" However I cannot change the limit for kernel flows. The set-flow-limit command made a confirmation but the show command still shows *(limit 20)* $ovs-appctl upcall/set-flow-limit 21 set flow_limit to 21 $ovs-appctl upcall/show system@ovs-system: flows : (current 70) (avg 68) (max 92) (limit 20) dump duration : 3ms ufid enabled : true 127: (keys 3) ... My question is, is it possible to change the limit on kernel flows and is upcall the right command? Thanks, Jerry ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH net-next v5 10/10] net: openvswitch: simplify the ovs_dp_cmd_new
From: Tonghao Zhang use the specified functions to init resource. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose --- net/openvswitch/datapath.c | 60 +- 1 file changed, 38 insertions(+), 22 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index aeb76e4..4d48e48 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1576,6 +1576,31 @@ static int ovs_dp_change(struct datapath *dp, struct nlattr *a[]) return 0; } +static int ovs_dp_stats_init(struct datapath *dp) +{ + dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); + if (!dp->stats_percpu) + return -ENOMEM; + + return 0; +} + +static int ovs_dp_vport_init(struct datapath *dp) +{ + int i; + + dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!dp->ports) + return -ENOMEM; + + for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->ports[i]); + + return 0; +} + static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) { struct nlattr **a = info->attrs; @@ -1584,7 +1609,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) struct datapath *dp; struct vport *vport; struct ovs_net *ovs_net; - int err, i; + int err; err = -EINVAL; if (!a[OVS_DP_ATTR_NAME] || !a[OVS_DP_ATTR_UPCALL_PID]) @@ -1597,35 +1622,26 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) err = -ENOMEM; dp = kzalloc(sizeof(*dp), GFP_KERNEL); if (dp == NULL) - goto err_free_reply; + goto err_destroy_reply; ovs_dp_set_net(dp, sock_net(skb->sk)); /* Allocate table. */ err = ovs_flow_tbl_init(&dp->table); if (err) - goto err_free_dp; + goto err_destroy_dp; - dp->stats_percpu = netdev_alloc_pcpu_stats(struct dp_stats_percpu); - if (!dp->stats_percpu) { - err = -ENOMEM; + err = ovs_dp_stats_init(dp); + if (err) goto err_destroy_table; - } - dp->ports = kmalloc_array(DP_VPORT_HASH_BUCKETS, - sizeof(struct hlist_head), - GFP_KERNEL); - if (!dp->ports) { - err = -ENOMEM; - goto err_destroy_percpu; - } - - for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) - INIT_HLIST_HEAD(&dp->ports[i]); + err = ovs_dp_vport_init(dp); + if (err) + goto err_destroy_stats; err = ovs_meters_init(dp); if (err) - goto err_destroy_ports_array; + goto err_destroy_ports; /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); @@ -1675,15 +1691,15 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) err_destroy_meters: ovs_meters_exit(dp); -err_destroy_ports_array: +err_destroy_ports: kfree(dp->ports); -err_destroy_percpu: +err_destroy_stats: free_percpu(dp->stats_percpu); err_destroy_table: ovs_flow_tbl_destroy(&dp->table); -err_free_dp: +err_destroy_dp: kfree(dp); -err_free_reply: +err_destroy_reply: kfree_skb(reply); err: return err; -- 1.8.3.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH net-next v5 08/10] net: openvswitch: fix possible memleak on destroy flow-table
From: Tonghao Zhang When we destroy the flow tables which may contain the flow_mask, so release the flow mask struct. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose --- net/openvswitch/flow_table.c | 185 +++ 1 file changed, 97 insertions(+), 88 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 5df5182..a128a7f 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -210,6 +210,74 @@ static int tbl_mask_array_realloc(struct flow_table *tbl, int size) return 0; } +static int tbl_mask_array_add_mask(struct flow_table *tbl, + struct sw_flow_mask *new) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int err, ma_count = READ_ONCE(ma->count); + + if (ma_count >= ma->max) { + err = tbl_mask_array_realloc(tbl, ma->max + + MASK_ARRAY_SIZE_MIN); + if (err) + return err; + + ma = ovsl_dereference(tbl->mask_array); + } + + BUG_ON(ovsl_dereference(ma->masks[ma_count])); + + rcu_assign_pointer(ma->masks[ma_count], new); + WRITE_ONCE(ma->count, ma_count +1); + + return 0; +} + +static void tbl_mask_array_del_mask(struct flow_table *tbl, + struct sw_flow_mask *mask) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int i, ma_count = READ_ONCE(ma->count); + + /* Remove the deleted mask pointers from the array */ + for (i = 0; i < ma_count; i++) { + if (mask == ovsl_dereference(ma->masks[i])) + goto found; + } + + BUG(); + return; + +found: + WRITE_ONCE(ma->count, ma_count -1); + + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); + + kfree_rcu(mask, rcu); + + /* Shrink the mask array if necessary. */ + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && + ma_count <= (ma->max / 3)) + tbl_mask_array_realloc(tbl, ma->max / 2); +} + +/* Remove 'mask' from the mask list, if it is not needed any more. */ +static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) +{ + if (mask) { + /* ovs-lock is required to protect mask-refcount and +* mask list. +*/ + ASSERT_OVSL(); + BUG_ON(!mask->ref_count); + mask->ref_count--; + + if (!mask->ref_count) + tbl_mask_array_del_mask(tbl, mask); + } +} + int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; @@ -257,7 +325,28 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu) __table_instance_destroy(ti); } -static void table_instance_destroy(struct table_instance *ti, +static void table_instance_flow_free(struct flow_table *table, + struct table_instance *ti, + struct table_instance *ufid_ti, + struct sw_flow *flow, + bool count) +{ + hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); + if (count) + table->count--; + + if (ovs_identifier_is_ufid(&flow->id)) { + hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); + + if (count) + table->ufid_count--; + } + + flow_mask_remove(table, flow->mask); +} + +static void table_instance_destroy(struct flow_table *table, + struct table_instance *ti, struct table_instance *ufid_ti, bool deferred) { @@ -274,13 +363,11 @@ static void table_instance_destroy(struct table_instance *ti, struct sw_flow *flow; struct hlist_head *head = &ti->buckets[i]; struct hlist_node *n; - int ver = ti->node_ver; - int ufid_ver = ufid_ti->node_ver; - hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) { - hlist_del_rcu(&flow->flow_table.node[ver]); - if (ovs_identifier_is_ufid(&flow->id)) - hlist_del_rcu(&flow->ufid_table.node[ufid_ver]); + hlist_for_each_entry_safe(flow, n, head, + flow_table.node[ti->node_ver]) { + + table_instance_flow_free(table, ti, ufid_ti, flow, false); ovs_flow_free(flow, deferred); } } @@ -305,7 +392,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) free_percpu(table->mask_cache); kfree_rcu(rcu_dereference_raw(table->m
[ovs-dev] [PATCH net-next v5 09/10] net: openvswitch: don't unlock mutex when changing the user_features fails
From: Tonghao Zhang Unlocking of a not locked mutex is not allowed. Other kernel thread may be in critical section while we unlock it because of setting user_feature fail. Fixes: 95a7233c4 ("net: openvswitch: Set OvS recirc_id from tc chain index") Cc: Paul Blakey Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu --- net/openvswitch/datapath.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 9fea7e1..aeb76e4 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -1657,6 +1657,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } + ovs_unlock(); goto err_destroy_meters; } @@ -1673,7 +1674,6 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) return 0; err_destroy_meters: - ovs_unlock(); ovs_meters_exit(dp); err_destroy_ports_array: kfree(dp->ports); -- 1.8.3.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH net-next v5 06/10] net: openvswitch: simplify the flow_hash
From: Tonghao Zhang Simplify the code and remove the unnecessary BUILD_BUG_ON. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu --- net/openvswitch/flow_table.c | 8 ++-- 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index a10d421..3e3d345 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -432,13 +432,9 @@ int ovs_flow_tbl_flush(struct flow_table *flow_table) static u32 flow_hash(const struct sw_flow_key *key, const struct sw_flow_key_range *range) { - int key_start = range->start; - int key_end = range->end; - const u32 *hash_key = (const u32 *)((const u8 *)key + key_start); - int hash_u32s = (key_end - key_start) >> 2; - + const u32 *hash_key = (const u32 *)((const u8 *)key + range->start); /* Make sure number of hash bytes are multiple of u32. */ - BUILD_BUG_ON(sizeof(long) % sizeof(u32)); + int hash_u32s = range_n_bytes(range) >> 2; return jhash2(hash_key, hash_u32s, 0); } -- 1.8.3.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH net-next v5 07/10] net: openvswitch: add likely in flow_lookup
From: Tonghao Zhang The most case *index < ma->max, and flow-mask is not NULL. We add un/likely for performance. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu --- net/openvswitch/flow_table.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 3e3d345..5df5182 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -518,7 +518,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct sw_flow_mask *mask; int i; - if (*index < ma->max) { + if (likely(*index < ma->max)) { mask = rcu_dereference_ovsl(ma->masks[*index]); if (mask) { flow = masked_flow_lookup(ti, key, mask, n_mask_hit); @@ -533,7 +533,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, continue; mask = rcu_dereference_ovsl(ma->masks[i]); - if (!mask) + if (unlikely(!mask)) break; flow = masked_flow_lookup(ti, key, mask, n_mask_hit); -- 1.8.3.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH net-next v5 04/10] net: openvswitch: optimize flow mask cache hash collision
From: Tonghao Zhang Port the codes to linux upstream and with little changes. Pravin B Shelar, says: | In case hash collision on mask cache, OVS does extra flow | lookup. Following patch avoid it. Link: https://github.com/openvswitch/ovs/commit/0e6efbe2712da03522532dc5e84806a96f6a0dd1 Signed-off-by: Tonghao Zhang Tested-by: Greg Rose --- net/openvswitch/flow_table.c | 95 1 file changed, 53 insertions(+), 42 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 237cf85..8d4f50d 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -508,6 +508,9 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, return NULL; } +/* Flow lookup does full lookup on flow table. It starts with + * mask from index passed in *index. + */ static struct sw_flow *flow_lookup(struct flow_table *tbl, struct table_instance *ti, struct mask_array *ma, @@ -516,18 +519,31 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, u32 *index) { struct sw_flow *flow; + struct sw_flow_mask *mask; int i; - for (i = 0; i < ma->max; i++) { - struct sw_flow_mask *mask; - - mask = rcu_dereference_ovsl(ma->masks[i]); + if (*index < ma->max) { + mask = rcu_dereference_ovsl(ma->masks[*index]); if (mask) { flow = masked_flow_lookup(ti, key, mask, n_mask_hit); - if (flow) { /* Found */ - *index = i; + if (flow) return flow; - } + } + } + + for (i = 0; i < ma->max; i++) { + + if (i == *index) + continue; + + mask = rcu_dereference_ovsl(ma->masks[i]); + if (!mask) + continue; + + flow = masked_flow_lookup(ti, key, mask, n_mask_hit); + if (flow) { /* Found */ + *index = i; + return flow; } } @@ -546,58 +562,54 @@ struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, u32 skb_hash, u32 *n_mask_hit) { - struct mask_array *ma = rcu_dereference_ovsl(tbl->mask_array); - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); - struct mask_cache_entry *entries, *ce, *del; + struct mask_array *ma = rcu_dereference(tbl->mask_array); + struct table_instance *ti = rcu_dereference(tbl->ti); + struct mask_cache_entry *entries, *ce; struct sw_flow *flow; - u32 hash = skb_hash; + u32 hash; int seg; *n_mask_hit = 0; if (unlikely(!skb_hash)) { - u32 __always_unused mask_index; + u32 mask_index = 0; return flow_lookup(tbl, ti, ma, key, n_mask_hit, &mask_index); } - del = NULL; + /* Pre and post recirulation flows usually have the same skb_hash +* value. To avoid hash collisions, rehash the 'skb_hash' with +* 'recirc_id'. */ + if (key->recirc_id) + skb_hash = jhash_1word(skb_hash, key->recirc_id); + + ce = NULL; + hash = skb_hash; entries = this_cpu_ptr(tbl->mask_cache); + /* Find the cache entry 'ce' to operate on. */ for (seg = 0; seg < MC_HASH_SEGS; seg++) { - int index; - - index = hash & (MC_HASH_ENTRIES - 1); - ce = &entries[index]; - - if (ce->skb_hash == skb_hash) { - struct sw_flow_mask *mask; - struct sw_flow *flow; - - mask = rcu_dereference_ovsl(ma->masks[ce->mask_index]); - if (mask) { - flow = masked_flow_lookup(ti, key, mask, - n_mask_hit); - if (flow) /* Found */ - return flow; - } - - del = ce; - break; + int index = hash & (MC_HASH_ENTRIES - 1); + struct mask_cache_entry *e; + + e = &entries[index]; + if (e->skb_hash == skb_hash) { + flow = flow_lookup(tbl, ti, ma, key, n_mask_hit, + &e->mask_index); + if (!flow) + e->skb_hash = 0; + return flow; } - if (!del || (del->skb_hash && !ce->skb_hash) || - (rcu_dereference_ovsl(ma->masks[del->
[ovs-dev] [PATCH net-next v5 05/10] net: openvswitch: optimize flow-mask looking up
From: Tonghao Zhang The full looking up on flow table traverses all mask array. If mask-array is too large, the number of invalid flow-mask increase, performance will be drop. One bad case, for example: M means flow-mask is valid and NULL of flow-mask means deleted. +---+ | M | NULL | ... | NULL | M| +---+ In that case, without this patch, openvswitch will traverses all mask array, because there will be one flow-mask in the tail. This patch changes the way of flow-mask inserting and deleting, and the mask array will be keep as below: there is not a NULL hole. In the fast path, we can "break" "for" (not "continue") in flow_lookup when we get a NULL flow-mask. "break" v +---+ | M | M | NULL |... | NULL | NULL| +---+ This patch don't optimize slow or control path, still using ma->max to traverse. Slow path: * tbl_mask_array_realloc * ovs_flow_tbl_lookup_exact * flow_mask_find Signed-off-by: Tonghao Zhang Tested-by: Greg Rose --- net/openvswitch/flow_table.c | 101 ++- 1 file changed, 52 insertions(+), 49 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 8d4f50d..a10d421 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -538,7 +538,7 @@ static struct sw_flow *flow_lookup(struct flow_table *tbl, mask = rcu_dereference_ovsl(ma->masks[i]); if (!mask) - continue; + break; flow = masked_flow_lookup(ti, key, mask, n_mask_hit); if (flow) { /* Found */ @@ -695,7 +695,7 @@ struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl, int ovs_flow_tbl_num_masks(const struct flow_table *table) { struct mask_array *ma = rcu_dereference_ovsl(table->mask_array); - return ma->count; + return READ_ONCE(ma->count); } static struct table_instance *table_instance_expand(struct table_instance *ti, @@ -704,21 +704,33 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } -static void tbl_mask_array_delete_mask(struct mask_array *ma, - struct sw_flow_mask *mask) +static void tbl_mask_array_del_mask(struct flow_table *tbl, + struct sw_flow_mask *mask) { - int i; + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int i, ma_count = READ_ONCE(ma->count); /* Remove the deleted mask pointers from the array */ - for (i = 0; i < ma->max; i++) { - if (mask == ovsl_dereference(ma->masks[i])) { - RCU_INIT_POINTER(ma->masks[i], NULL); - ma->count--; - kfree_rcu(mask, rcu); - return; - } + for (i = 0; i < ma_count; i++) { + if (mask == ovsl_dereference(ma->masks[i])) + goto found; } + BUG(); + return; + +found: + WRITE_ONCE(ma->count, ma_count -1); + + rcu_assign_pointer(ma->masks[i], ma->masks[ma_count -1]); + RCU_INIT_POINTER(ma->masks[ma_count -1], NULL); + + kfree_rcu(mask, rcu); + + /* Shrink the mask array if necessary. */ + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && + ma_count <= (ma->max / 3)) + tbl_mask_array_realloc(tbl, ma->max / 2); } /* Remove 'mask' from the mask list, if it is not needed any more. */ @@ -732,17 +744,8 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) BUG_ON(!mask->ref_count); mask->ref_count--; - if (!mask->ref_count) { - struct mask_array *ma; - - ma = ovsl_dereference(tbl->mask_array); - tbl_mask_array_delete_mask(ma, mask); - - /* Shrink the mask array if necessary. */ - if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && - ma->count <= (ma->max / 3)) - tbl_mask_array_realloc(tbl, ma->max / 2); - } + if (!mask->ref_count) + tbl_mask_array_del_mask(tbl, mask); } } @@ -806,6 +809,29 @@ static struct sw_flow_mask *flow_mask_find(const struct flow_table *tbl, return NULL; } +static int tbl_mask_array_add_mask(struct flow_table *tbl, + struct sw_flow_mask *new) +{ + struct mask_array *ma = ovsl_dereference(tbl->mask_array); + int err, ma_count = READ_ONCE(ma->count); + + if (ma_count >= ma->max) { + err = tbl_mask_array_real
[ovs-dev] [PATCH net-next v5 03/10] net: openvswitch: shrink the mask array if necessary
From: Tonghao Zhang When creating and inserting flow-mask, if there is no available flow-mask, we realloc the mask array. When removing flow-mask, if necessary, we shrink mask array. Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu --- net/openvswitch/flow_table.c | 33 +++-- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 0d1df53..237cf85 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -693,6 +693,23 @@ static struct table_instance *table_instance_expand(struct table_instance *ti, return table_instance_rehash(ti, ti->n_buckets * 2, ufid); } +static void tbl_mask_array_delete_mask(struct mask_array *ma, + struct sw_flow_mask *mask) +{ + int i; + + /* Remove the deleted mask pointers from the array */ + for (i = 0; i < ma->max; i++) { + if (mask == ovsl_dereference(ma->masks[i])) { + RCU_INIT_POINTER(ma->masks[i], NULL); + ma->count--; + kfree_rcu(mask, rcu); + return; + } + } + BUG(); +} + /* Remove 'mask' from the mask list, if it is not needed any more. */ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) { @@ -706,18 +723,14 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask) if (!mask->ref_count) { struct mask_array *ma; - int i; ma = ovsl_dereference(tbl->mask_array); - for (i = 0; i < ma->max; i++) { - if (mask == ovsl_dereference(ma->masks[i])) { - RCU_INIT_POINTER(ma->masks[i], NULL); - ma->count--; - kfree_rcu(mask, rcu); - return; - } - } - BUG(); + tbl_mask_array_delete_mask(ma, mask); + + /* Shrink the mask array if necessary. */ + if (ma->max >= (MASK_ARRAY_SIZE_MIN * 2) && + ma->count <= (ma->max / 3)) + tbl_mask_array_realloc(tbl, ma->max / 2); } } } -- 1.8.3.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH net-next v5 02/10] net: openvswitch: convert mask list in mask array
From: Tonghao Zhang Port the codes to linux upstream and with little changes. Pravin B Shelar, says: | mask caches index of mask in mask_list. On packet recv OVS | need to traverse mask-list to get cached mask. Therefore array | is better for retrieving cached mask. This also allows better | cache replacement algorithm by directly checking mask's existence. Link: https://github.com/openvswitch/ovs/commit/d49fc3ff53c65e4eca9cabd52ac63396746a7ef5 Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu --- net/openvswitch/flow.h | 1 - net/openvswitch/flow_table.c | 210 --- net/openvswitch/flow_table.h | 8 +- 3 files changed, 167 insertions(+), 52 deletions(-) diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index b830d5f..8080518 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -166,7 +166,6 @@ struct sw_flow_key_range { struct sw_flow_mask { int ref_count; struct rcu_head rcu; - struct list_head list; struct sw_flow_key_range range; struct sw_flow_key key; }; diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index 3d515c0..0d1df53 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -34,6 +34,7 @@ #include #define TBL_MIN_BUCKETS1024 +#define MASK_ARRAY_SIZE_MIN16 #define REHASH_INTERVAL(10 * 60 * HZ) #define MC_HASH_SHIFT 8 @@ -168,9 +169,51 @@ static struct table_instance *table_instance_alloc(int new_size) return ti; } +static struct mask_array *tbl_mask_array_alloc(int size) +{ + struct mask_array *new; + + size = max(MASK_ARRAY_SIZE_MIN, size); + new = kzalloc(sizeof(struct mask_array) + + sizeof(struct sw_flow_mask *) * size, GFP_KERNEL); + if (!new) + return NULL; + + new->count = 0; + new->max = size; + + return new; +} + +static int tbl_mask_array_realloc(struct flow_table *tbl, int size) +{ + struct mask_array *old; + struct mask_array *new; + + new = tbl_mask_array_alloc(size); + if (!new) + return -ENOMEM; + + old = ovsl_dereference(tbl->mask_array); + if (old) { + int i; + + for (i = 0; i < old->max; i++) { + if (ovsl_dereference(old->masks[i])) + new->masks[new->count++] = old->masks[i]; + } + } + + rcu_assign_pointer(tbl->mask_array, new); + kfree_rcu(old, rcu); + + return 0; +} + int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; + struct mask_array *ma; table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) * MC_HASH_ENTRIES, @@ -178,9 +221,13 @@ int ovs_flow_tbl_init(struct flow_table *table) if (!table->mask_cache) return -ENOMEM; + ma = tbl_mask_array_alloc(MASK_ARRAY_SIZE_MIN); + if (!ma) + goto free_mask_cache; + ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ti) - goto free_mask_cache; + goto free_mask_array; ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ufid_ti) @@ -188,7 +235,7 @@ int ovs_flow_tbl_init(struct flow_table *table) rcu_assign_pointer(table->ti, ti); rcu_assign_pointer(table->ufid_ti, ufid_ti); - INIT_LIST_HEAD(&table->mask_list); + rcu_assign_pointer(table->mask_array, ma); table->last_rehash = jiffies; table->count = 0; table->ufid_count = 0; @@ -196,6 +243,8 @@ int ovs_flow_tbl_init(struct flow_table *table) free_ti: __table_instance_destroy(ti); +free_mask_array: + kfree(ma); free_mask_cache: free_percpu(table->mask_cache); return -ENOMEM; @@ -255,6 +304,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); free_percpu(table->mask_cache); + kfree_rcu(rcu_dereference_raw(table->mask_array), rcu); table_instance_destroy(ti, ufid_ti, false); } @@ -460,17 +510,27 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, static struct sw_flow *flow_lookup(struct flow_table *tbl, struct table_instance *ti, + struct mask_array *ma, const struct sw_flow_key *key, - u32 *n_mask_hit) + u32 *n_mask_hit, + u32 *index) { - struct sw_flow_mask *mask; struct sw_flow *flow; + int i; - list_for_each_entry_rcu(mask, &tbl->mask_list, list) { - flow = masked_flow_lookup(ti, key, mask, n_mask_hit); -
[ovs-dev] [PATCH net-next v5 00/10] optimize openvswitch flow looking up
From: Tonghao Zhang This series patch optimize openvswitch for performance or simplify codes. Patch 1, 2, 4: Port Pravin B Shelar patches to linux upstream with little changes. Patch 5, 6, 7: Optimize the flow looking up and simplify the flow hash. Patch 8, 9: are bugfix. The performance test is on Intel Xeon E5-2630 v4. The test topology is show as below: +---+ | +---+ | | | eth0 ovs-switcheth1 | | Host0 | +---+ | +---+ ^ | | | | | | | | v +-++ ++-+ | netperf | Host1 | netserver| Host2 +--+ +--+ We use netperf send the 64B packets, and insert 255+ flow-mask: $ ovs-dpctl add-flow ovs-switch "in_port(1),eth(dst=00:01:00:00:00:00/ff:ff:ff:ff:ff:01),eth_type(0x0800),ipv4(frag=no)" 2 ... $ ovs-dpctl add-flow ovs-switch "in_port(1),eth(dst=00:ff:00:00:00:00/ff:ff:ff:ff:ff:ff),eth_type(0x0800),ipv4(frag=no)" 2 $ $ netperf -t UDP_STREAM -H 2.2.2.200 -l 40 -- -m 18 * Without series patch, throughput 8.28Mbps * With series patch, throughput 46.05Mbps v5: rewrite patch 8, release flow-mask when freeing flow v4: access ma->count with READ_ONCE/WRITE_ONCE API. More information, see patch 5 comments. v3: update ma point when realloc mask_array in patch 5 v2: simplify codes. e.g. use kfree_rcu instead of call_rcu Tonghao Zhang (10): net: openvswitch: add flow-mask cache for performance net: openvswitch: convert mask list in mask array net: openvswitch: shrink the mask array if necessary net: openvswitch: optimize flow mask cache hash collision net: openvswitch: optimize flow-mask looking up net: openvswitch: simplify the flow_hash net: openvswitch: add likely in flow_lookup net: openvswitch: fix possible memleak on destroy flow-table net: openvswitch: don't unlock mutex when changing the user_features fails net: openvswitch: simplify the ovs_dp_cmd_new net/openvswitch/datapath.c | 65 +--- net/openvswitch/flow.h | 1 - net/openvswitch/flow_table.c | 381 ++- net/openvswitch/flow_table.h | 19 ++- 4 files changed, 360 insertions(+), 106 deletions(-) -- 1.8.3.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH net-next v5 01/10] net: openvswitch: add flow-mask cache for performance
From: Tonghao Zhang The idea of this optimization comes from a patch which is committed in 2014, openvswitch community. The author is Pravin B Shelar. In order to get high performance, I implement it again. Later patches will use it. Pravin B Shelar, says: | On every packet OVS needs to lookup flow-table with every | mask until it finds a match. The packet flow-key is first | masked with mask in the list and then the masked key is | looked up in flow-table. Therefore number of masks can | affect packet processing performance. Link: https://github.com/openvswitch/ovs/commit/5604935e4e1cbc16611d2d97f50b717aa31e8ec5 Signed-off-by: Tonghao Zhang Tested-by: Greg Rose Acked-by: William Tu --- net/openvswitch/datapath.c | 3 +- net/openvswitch/flow_table.c | 109 +-- net/openvswitch/flow_table.h | 11 - 3 files changed, 107 insertions(+), 16 deletions(-) diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index f30e406..9fea7e1 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -227,7 +227,8 @@ void ovs_dp_process_packet(struct sk_buff *skb, struct sw_flow_key *key) stats = this_cpu_ptr(dp->stats_percpu); /* Look up flow. */ - flow = ovs_flow_tbl_lookup_stats(&dp->table, key, &n_mask_hit); + flow = ovs_flow_tbl_lookup_stats(&dp->table, key, skb_get_hash(skb), +&n_mask_hit); if (unlikely(!flow)) { struct dp_upcall_info upcall; diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index cf3582c..3d515c0 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -36,6 +36,10 @@ #define TBL_MIN_BUCKETS1024 #define REHASH_INTERVAL(10 * 60 * HZ) +#define MC_HASH_SHIFT 8 +#define MC_HASH_ENTRIES(1u << MC_HASH_SHIFT) +#define MC_HASH_SEGS ((sizeof(uint32_t) * 8) / MC_HASH_SHIFT) + static struct kmem_cache *flow_cache; struct kmem_cache *flow_stats_cache __read_mostly; @@ -168,10 +172,15 @@ int ovs_flow_tbl_init(struct flow_table *table) { struct table_instance *ti, *ufid_ti; - ti = table_instance_alloc(TBL_MIN_BUCKETS); + table->mask_cache = __alloc_percpu(sizeof(struct mask_cache_entry) * + MC_HASH_ENTRIES, + __alignof__(struct mask_cache_entry)); + if (!table->mask_cache) + return -ENOMEM; + ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ti) - return -ENOMEM; + goto free_mask_cache; ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS); if (!ufid_ti) @@ -187,6 +196,8 @@ int ovs_flow_tbl_init(struct flow_table *table) free_ti: __table_instance_destroy(ti); +free_mask_cache: + free_percpu(table->mask_cache); return -ENOMEM; } @@ -243,6 +254,7 @@ void ovs_flow_tbl_destroy(struct flow_table *table) struct table_instance *ti = rcu_dereference_raw(table->ti); struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti); + free_percpu(table->mask_cache); table_instance_destroy(ti, ufid_ti, false); } @@ -425,7 +437,8 @@ static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow, static struct sw_flow *masked_flow_lookup(struct table_instance *ti, const struct sw_flow_key *unmasked, - const struct sw_flow_mask *mask) + const struct sw_flow_mask *mask, + u32 *n_mask_hit) { struct sw_flow *flow; struct hlist_head *head; @@ -435,6 +448,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, ovs_flow_mask_key(&masked_key, unmasked, false, mask); hash = flow_hash(&masked_key, &mask->range); head = find_bucket(ti, hash); + (*n_mask_hit)++; + hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) { if (flow->mask == mask && flow->flow_table.hash == hash && flow_cmp_masked_key(flow, &masked_key, &mask->range)) @@ -443,30 +458,97 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti, return NULL; } -struct sw_flow *ovs_flow_tbl_lookup_stats(struct flow_table *tbl, - const struct sw_flow_key *key, - u32 *n_mask_hit) +static struct sw_flow *flow_lookup(struct flow_table *tbl, + struct table_instance *ti, + const struct sw_flow_key *key, + u32 *n_mask_hit) { - struct table_instance *ti = rcu_dereference_ovsl(tbl->ti); struct sw_flow_mask *mask; struct sw_flow *flow; - *n
[ovs-dev] [HELP] Question about userspace meter
Hi all: I have been testing OVS-DPDK Rate limit using meter table these days, but I have found the rate deviation is larger than kernel meter. My test result is as below: case 1. No Rate limit : Iperf between two VMs(on the same compute node) TCP bandwidth is above 1G case 2. Rate limit: 100Mbps, burst 80Mbps , Iperf bandwith is around 80Mbps case 3.Rate limit: 100Mbps, burst 100Mbps , Iperf bandwith is also around 80Mbps the meter setting of case 2 and 3 is as below: case2 : meter=1 kbps burst stats bands= type=drop rate=10 burst_size=8 case3 : meter=1 kbps burst stats bands= type=drop rate=10 burst_size=10 the dp->meters and meter bands parameter is as below: case 2: (gdb) p *$5 $6 = {flags = 13, n_bands = 1, max_delta_t = 800, used = 7071330038973, packet_count = 1150879, byte_count = 1652639486, bands = 0x12adf70} (gdb) p $5->bands[0] $7 = {up = {type = 1, prec_level = 0 '\000', rate = 10, burst_size = 8000}, bucket = 9712, packet_count = 125916, byte_count = 180814928} case 3: $3 = {flags = 13, n_bands = 1, max_delta_t = 1000, used = 7072663887432, packet_count = 6209035, byte_count = 8916116275, bands = 0x13c1990} (gdb) p $2->bands[0] $4 = {up = {type = 1, prec_level = 0 '\000', rate = 10, burst_size = 1}, bucket = 489312, packet_count = 728789, byte_count = 1046541004} I don't know the reason of the test deviation and how to set burst size ? Can anyone give me a simple explanation of the relationship between rate and burst value ? ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCHv2] netdev-afxdp: Enable loading XDP program.
Hi Eelco, Thanks for your review. On Tue, Oct 29, 2019 at 8:28 AM Eelco Chaudron wrote: > > See comments inline… > > On 5 Oct 2019, at 2:12, William Tu wrote: > > > Now netdev-afxdp always forwards all packets to userspace because > > it is using libbpf's default XDP program, see 'xsk_load_xdp_prog'. > > There are some cases when users want to keep packets in kernel instead > > of sending to userspace, for example, management traffic such as SSH > > should be processed in kernel. > > > > The patch enables loading the user-provide XDP program by doing > > $ovs-vsctl -- set int afxdp-p0 options:xdpobj= > > Should we maybe change the name to xdp-obj? Yes, will fix it next version. > > > So users can implement their filtering logic or traffic steering idea > > in their XDP program, and rest of the traffic passes to AF_XDP socket > > handled by OVS. > > > > Signed-off-by: William Tu > > --- > > v2: > > A couple fixes and remove RFC > > --- > > Documentation/intro/install/afxdp.rst | 46 > > lib/netdev-afxdp.c| 137 > > ++ > > lib/netdev-linux-private.h| 2 + > > 3 files changed, 169 insertions(+), 16 deletions(-) > > > > diff --git a/Documentation/intro/install/afxdp.rst > > b/Documentation/intro/install/afxdp.rst > > index 820e9d993d8f..f484bdb8249b 100644 > > --- a/Documentation/intro/install/afxdp.rst > > +++ b/Documentation/intro/install/afxdp.rst > > @@ -265,6 +265,52 @@ Or, use OVS pmd tool:: > >ovs-appctl dpif-netdev/pmd-stats-show > > > > > > +Loading Custom XDP Program > > +-- > > +By defailt, netdev-afxdp always forwards all packets to userspace > > because > > +it is using libbpf's default XDP program. There are some cases when > > users > > +want to keep packets in kernel instead of sending to userspace, for > > example, > > +management traffic such as SSH should be processed in kernel. This > > can be > > +done by loading the user-provided XDP program:: > > + > > + ovs-vsctl -- set int afxdp-p0 options:xdpobj= > > + > > +So users can implement their filtering logic or traffic steering idea > > +in their XDP program, and rest of the traffic passes to AF_XDP socket > > +handled by OVS. Below is a sample C program compiled under kernel's > > +samples/bpf/. > > + > > +.. code-block:: c > > + > > + #include > > + #include "bpf_helpers.h" > > + > > + /* OVS will associate map 'xsks_map' to xsk socket. */ > > + struct bpf_map_def SEC("maps") xsks_map = { > > + .type = BPF_MAP_TYPE_XSKMAP, > > + .key_size = sizeof(int), > > + .value_size = sizeof(int), > > + .max_entries = 32, > > + }; > > + > > Maybe something like this could be added to make sure it compiles with > older kernels: > > #if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0) > Good point, I will add it. > /* Kernel version before 5.3 needed an additional map */ > struct bpf_map_def SEC("maps") qidconf_map = { > .type = BPF_MAP_TYPE_ARRAY, > .key_size = sizeof(int), > .value_size = sizeof(int), > .max_entries = 64, > }; > #endi > > See also here: > https://github.com/chaudron/xdp-tutorial/blob/master/advanced03-AF_XDP/af_xdp_kern.c > > > + SEC("xdp_sock") > > + int xdp_sock_prog(struct xdp_md *ctx) > > + { > > + int index = ctx->rx_queue_index; > > + > > + /* Customized by user. > > + * For example > > + * 1) filter out all SSH traffic and return XDP_PASS > > + *for kernel to process. > > + * 2) Drop unwanted packet by returning XDP_DROP. > > + */ > > + > > + /* Rest of packets goes to AF_XDP. */ > > + return bpf_redirect_map(&xsks_map, index, 0); > > + } > > + char _license[] SEC("license") = "GPL"; > > + > > + > > Example Script > > -- > > > > diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c > > index 6e01803272aa..231d6473fb67 100644 > > --- a/lib/netdev-afxdp.c > > +++ b/lib/netdev-afxdp.c > > @@ -21,6 +21,7 @@ > > #include "netdev-afxdp.h" > > #include "netdev-afxdp-pool.h" > > > > +#include > > #include > > #include > > #include > > @@ -29,6 +30,7 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > > > @@ -82,7 +84,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS); > > #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char > > *)base)) > > > > static struct xsk_socket_info *xsk_configure(int ifindex, int > > xdp_queue_id, > > - int mode); > > + int mode, bool > > use_default_xdp); > > static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode); > > static void xsk_destroy(struct xsk_socket_info *xsk); > > static int xsk_configure_all(struct netdev *netdev); > > @@ -158,6 +160,51 @@ netdev_afxdp_sweep_unused_pools(void *aux > > OVS_UNUSED) > > ovs_mutex_unlock(&unused_pools_mutex); > > } > > > > +static int > > +xsk_load
Re: [ovs-dev] [PATCH ovn 4/6] northd: Add lflows for IPv6 NAT.
Bleep bloop. Greetings Russell Bryant, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: WARNING: Line is 82 characters long (recommended limit is 79) #86 FILE: northd/ovn-northd.c:6095: static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); WARNING: Line is 82 characters long (recommended limit is 79) #192 FILE: northd/ovn-northd.c:6980: static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); WARNING: Line is 83 characters long (recommended limit is 79) #247 FILE: northd/ovn-northd.c:7026: "inport == %s && arp.tpa == "IP_FMT" && arp.op == 1", WARNING: Line is 81 characters long (recommended limit is 79) #367 FILE: northd/ovn-northd.c:7176: && op->lrp_networks.ipv4_addrs[i].addr == snat_ips[j].ipv4) { WARNING: Line is 82 characters long (recommended limit is 79) #470 FILE: northd/ovn-northd.c:7420: static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); WARNING: Line is 81 characters long (recommended limit is 79) #476 FILE: northd/ovn-northd.c:7426: /* It was an invalid IPv4 address, but valid IPv6. Treat the rest WARNING: Line is 123 characters long (recommended limit is 79) #500 FILE: northd/ovn-northd.c:7450: if (error || (!is_v6 && mask != OVS_BE32_MAX) || (is_v6 && memcmp(&mask_v6, &v6_exact, sizeof(mask_v6 { Lines checked: 687, Warnings: 7, Errors: 0 Please check this out. If you feel there has been an error, please email acon...@redhat.com Thanks, 0-day Robot ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovn 3/6] ovn-nbctl: Allow IPv6 NAT rules to be added
Bleep bloop. Greetings Russell Bryant, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: WARNING: Line is 81 characters long (recommended limit is 79) #154 FILE: utilities/ovn-nbctl.c:3876: ctl_error(ctx, "%s: Not a valid IPv4 or IPv6 address.", external_ip); Lines checked: 207, Warnings: 1, Errors: 0 Please check this out. If you feel there has been an error, please email acon...@redhat.com Thanks, 0-day Robot ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovn 2/6] actions: Add IPv6 support to lflow NAT actions
Bleep bloop. Greetings Russell Bryant, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: WARNING: Line is 85 characters long (recommended limit is 79) #172 FILE: utilities/ovn-trace.c:1902: ds_put_format(&s, "(ip4.%s="IP_FMT")", direction, IP_ARGS(ct_nat->ipv4)); Lines checked: 183, Warnings: 1, Errors: 0 Please check this out. If you feel there has been an error, please email acon...@redhat.com Thanks, 0-day Robot ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH ovn 5/6] system-ovn: Add IPv6 NAT test cases
These tests failed prior to the changes leading up to this one. Signed-off-by: Russell Bryant --- tests/system-ovn.at | 862 +++- 1 file changed, 860 insertions(+), 2 deletions(-) diff --git a/tests/system-ovn.at b/tests/system-ovn.at index f88ad31e4..b3f90aae2 100644 --- a/tests/system-ovn.at +++ b/tests/system-ovn.at @@ -176,6 +176,186 @@ OVS_TRAFFIC_VSWITCHD_STOP(["/failed to query port patch-.*/d /connection dropped.*/d"]) AT_CLEANUP +AT_SETUP([ovn -- 2 LRs connected via LS, gateway router, SNAT and DNAT - IPv6]) +AT_KEYWORDS([ovnnat]) + +CHECK_CONNTRACK() +CHECK_CONNTRACK_NAT() +ovn_start +OVS_TRAFFIC_VSWITCHD_START() +ADD_BR([br-int]) + +# Set external-ids in br-int needed for ovn-controller +ovs-vsctl \ +-- set Open_vSwitch . external-ids:system-id=hv1 \ +-- set Open_vSwitch . external-ids:ovn-remote=unix:$ovs_base/ovn-sb/ovn-sb.sock \ +-- set Open_vSwitch . external-ids:ovn-encap-type=geneve \ +-- set Open_vSwitch . external-ids:ovn-encap-ip=169.0.0.1 \ +-- set bridge br-int fail-mode=secure other-config:disable-in-band=true + +# Start ovn-controller +start_daemon ovn-controller + +# Logical network: +# Two LRs - R1 and R2 that are connected to each other via LS "join" +# in fd00::/64 network. R1 has switchess foo (fd11::/64) and +# bar (fd12::/64) connected to it. R2 has alice (fd21::/64) connected +# to it. R2 is a gateway router on which we add NAT rules. +# +#foo -- R1 -- join - R2 -- alice +# | +#bar + +ovn-nbctl create Logical_Router name=R1 +ovn-nbctl create Logical_Router name=R2 options:chassis=hv1 + +ovn-nbctl ls-add foo +ovn-nbctl ls-add bar +ovn-nbctl ls-add alice +ovn-nbctl ls-add join + +# Connect foo to R1 +ovn-nbctl lrp-add R1 foo 00:00:01:01:02:03 fd11::1/64 +ovn-nbctl lsp-add foo rp-foo -- set Logical_Switch_Port rp-foo \ +type=router options:router-port=foo addresses=\"00:00:01:01:02:03\" + +# Connect bar to R1 +ovn-nbctl lrp-add R1 bar 00:00:01:01:02:04 fd12::1/64 +ovn-nbctl lsp-add bar rp-bar -- set Logical_Switch_Port rp-bar \ +type=router options:router-port=bar addresses=\"00:00:01:01:02:04\" + +# Connect alice to R2 +ovn-nbctl lrp-add R2 alice 00:00:02:01:02:03 fd21::1/64 +ovn-nbctl lsp-add alice rp-alice -- set Logical_Switch_Port rp-alice \ +type=router options:router-port=alice addresses=\"00:00:02:01:02:03\" + +# Connect R1 to join +ovn-nbctl lrp-add R1 R1_join 00:00:04:01:02:03 fd00::1/64 +ovn-nbctl lsp-add join r1-join -- set Logical_Switch_Port r1-join \ +type=router options:router-port=R1_join addresses='"00:00:04:01:02:03"' + +# Connect R2 to join +ovn-nbctl lrp-add R2 R2_join 00:00:04:01:02:04 fd00::2/64 +ovn-nbctl lsp-add join r2-join -- set Logical_Switch_Port r2-join \ +type=router options:router-port=R2_join addresses='"00:00:04:01:02:04"' + +# Static routes. +ovn-nbctl lr-route-add R1 fd21::/64 fd00::2 +ovn-nbctl lr-route-add R2 fd11::/64 fd00::1 +ovn-nbctl lr-route-add R2 fd12::/64 fd00::1 + +# Logical port 'foo1' in switch 'foo'. +ADD_NAMESPACES(foo1) +ADD_VETH(foo1, foo1, br-int, "fd11::2/64", "f0:00:00:01:02:03", \ + "fd11::1") +OVS_WAIT_UNTIL([test "$(ip netns exec foo1 ip a | grep fd11::2 | grep tentative)" = ""]) +ovn-nbctl lsp-add foo foo1 \ +-- lsp-set-addresses foo1 "f0:00:00:01:02:03 fd11::2" + +# Logical port 'alice1' in switch 'alice'. +ADD_NAMESPACES(alice1) +ADD_VETH(alice1, alice1, br-int, "fd21::2/64", "f0:00:00:01:02:04", \ + "fd21::1") +OVS_WAIT_UNTIL([test "$(ip netns exec alice1 ip a | grep fd21::2 | grep tentative)" = ""]) +ovn-nbctl lsp-add alice alice1 \ +-- lsp-set-addresses alice1 "f0:00:00:01:02:04 fd21::2" + +# Logical port 'bar1' in switch 'bar'. +ADD_NAMESPACES(bar1) +ADD_VETH(bar1, bar1, br-int, "fd12::2/64", "f0:00:00:01:02:05", \ + "fd12::1") +OVS_WAIT_UNTIL([test "$(ip netns exec bar1 ip a | grep fd12::2 | grep tentative)" = ""]) +ovn-nbctl lsp-add bar bar1 \ +-- lsp-set-addresses bar1 "f0:00:00:01:02:05 fd12::2" + +# Add a DNAT rule. +ovn-nbctl -- --id=@nat create nat type="dnat" logical_ip=\"fd11::2\" \ +external_ip=\"fd30::2\" -- add logical_router R2 nat @nat + +# Add a SNAT rule +ovn-nbctl -- --id=@nat create nat type="snat" logical_ip=\"fd12::2\" \ +external_ip=\"fd30::1\" -- add logical_router R2 nat @nat + +# wait for ovn-controller to catch up. +ovn-nbctl --wait=hv sync +OVS_WAIT_UNTIL([ovs-ofctl dump-flows br-int | grep 'nat(src=fd30::1)']) + +# 'alice1' should be able to ping 'foo1' directly. +NS_CHECK_EXEC([alice1], [ping -6 -v -q -c 3 -i 0.3 -w 2 fd11::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# North-South DNAT: 'alice1' should also be able to ping 'foo1' via fd30::2 +NS_CHECK_EXEC([alice1], [ping -6 -q -c 3 -i 0.3 -w 2 fd30::2 | FORMAT_PING], \ +[0], [dnl +3 packets transmitted, 3 received, 0% packet loss, time 0ms +]) + +# Check conntrack entries. +AT_CHECK([ovs-appctl dpctl/dump-conntra
[ovs-dev] [PATCH ovn 3/6] ovn-nbctl: Allow IPv6 NAT rules to be added
Signed-off-by: Russell Bryant --- tests/ovn-nbctl.at| 41 utilities/ovn-nbctl.c | 48 --- 2 files changed, 68 insertions(+), 21 deletions(-) diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at index 01091dd99..43a980bdf 100644 --- a/tests/ovn-nbctl.at +++ b/tests/ovn-nbctl.at @@ -407,16 +407,16 @@ AT_CHECK([ovn-nbctl lr-nat-add lr0 snatt 30.0.0.2 192.168.1.2], [1], [], [ovn-nbctl: snatt: type must be one of "dnat", "snat" and "dnat_and_snat". ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2a 192.168.1.2], [1], [], -[ovn-nbctl: 30.0.0.2a: should be an IPv4 address. +[ovn-nbctl: 30.0.0.2a: Not a valid IPv4 or IPv6 address. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0 192.168.1.2], [1], [], -[ovn-nbctl: 30.0.0: should be an IPv4 address. +[ovn-nbctl: 30.0.0: Not a valid IPv4 or IPv6 address. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2/24 192.168.1.2], [1], [], -[ovn-nbctl: 30.0.0.2/24: should be an IPv4 address. +[ovn-nbctl: 30.0.0.2/24: Not a valid IPv4 or IPv6 address. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2:80 192.168.1.2], [1], [], -[ovn-nbctl: 30.0.0.2:80: should be an IPv4 address. +[ovn-nbctl: 30.0.0.2:80: Not a valid IPv4 or IPv6 address. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2 192.168.1.2a], [1], [], [ovn-nbctl: 192.168.1.2a: should be an IPv4 address or network. @@ -431,19 +431,19 @@ AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.2 192.168.1.2/a], [1], [], [ovn-nbctl: 192.168.1.2/a: should be an IPv4 address or network. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.2 192.168.1.2a], [1], [], -[ovn-nbctl: 192.168.1.2a: should be an IPv4 address. +[ovn-nbctl: 192.168.1.2a: Not a valid IPv4 address. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.2 192.168.1], [1], [], -[ovn-nbctl: 192.168.1: should be an IPv4 address. +[ovn-nbctl: 192.168.1: Not a valid IPv4 address. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.2 192.168.1.2:80], [1], [], -[ovn-nbctl: 192.168.1.2:80: should be an IPv4 address. +[ovn-nbctl: 192.168.1.2:80: Not a valid IPv4 address. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.2 192.168.1.2/24], [1], [], -[ovn-nbctl: 192.168.1.2/24: should be an IPv4 address. +[ovn-nbctl: 192.168.1.2/24: Not a valid IPv4 address. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.2 192.168.1.2/24], [1], [], -[ovn-nbctl: 192.168.1.2/24: should be an IPv4 address. +[ovn-nbctl: 192.168.1.2/24: Not a valid IPv4 address. ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.2 192.168.1.2 lp0], [1], [], [ovn-nbctl: lr-nat-add with logical_port must also specify external_mac. @@ -465,15 +465,23 @@ AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.2 192.168.1.2 lp0 00:00: dnl Add snat and dnat AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.1 192.168.1.0/24]) +AT_CHECK([ovn-nbctl lr-nat-add lr0 snat fd01::1 fd11::/64]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 30.0.0.1 192.168.1.2]) +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat fd01::1 fd11::2]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.1 192.168.1.2]) +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat fd01::1 fd11::2]) AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat 30.0.0.2 192.168.1.3 lp0 00:00:00:01:02:03]) +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat_and_snat fd01::2 fd11::3 lp0 00:00:00:01:02:03]) AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl TYPE EXTERNAL_IPLOGICAL_IPEXTERNAL_MAC LOGICAL_PORT dnat 30.0.0.1 192.168.1.2 +dnat fd01::1fd11::2 dnat_and_snat30.0.0.1 192.168.1.2 dnat_and_snat30.0.0.2 192.168.1.3 00:00:00:01:02:03 lp0 +dnat_and_snatfd01::1fd11::2 +dnat_and_snatfd01::2fd11::3 00:00:00:01:02:03 lp0 snat 30.0.0.1 192.168.1.0/24 +snat fd01::1fd11::/64 ]) AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 30.0.0.1 192.168.1.0/24], [1], [], [ovn-nbctl: 30.0.0.1, 192.168.1.0/24: a NAT with this external_ip and logical_ip already exists @@ -503,17 +511,26 @@ AT_CHECK([ovn-nbctl --may-exist lr-nat-add lr0 dnat_and_snat 30.0.0.2 192.168.1. AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl TYPE EXTERNAL_IPLOGICAL_IPEXTERNAL_MAC LOGICAL_PORT dnat 30.0.0.1 192.168.1.2 +dnat fd01::1fd11::2 dnat_and_snat30.0.0.1 192.168.1.2 dnat_and_snat30.0.0.2 192.168.1.3 00:00:00:04:05:06 lp0 +dnat_and_snatfd01::1fd11::2 +dnat_and_snatfd01::2fd11::3 00:00:00:01:02:03 lp0 snat 30.0.0.1 192.168.1.0/24 +snat fd01::1fd11::/64 ]) AT_CHECK([ovn-nbctl --may-exist lr-nat-add lr0 dnat_and_snat 30.0.0.2 192.168.1.3]) +AT_CHECK([ovn-nbct
[ovs-dev] [PATCH ovn 6/6] NEWS: Add IPv6 NAT support
Signed-off-by: Russell Bryant --- NEWS | 6 ++ 1 file changed, 6 insertions(+) diff --git a/NEWS b/NEWS index 73045d65f..ab2f13318 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,9 @@ +Post-OVS-v2.12.0 +- + - OVN was split out from the OVS repository and is now released + independently. + - Added IPv6 NAT support for OVN routers. + Post-v2.11.0 - - DPDK: -- 2.21.0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH ovn 4/6] northd: Add lflows for IPv6 NAT.
Signed-off-by: Russell Bryant --- northd/ovn-northd.c | 376 1 file changed, 278 insertions(+), 98 deletions(-) diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index ae81a6944..a10017ba1 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -66,6 +66,15 @@ struct northd_context { struct ovsdb_idl_index *sbrec_ip_mcast_by_dp; }; +/* An IPv4 or IPv6 address */ +struct v46_ip { +int family; +union { +ovs_be32 ipv4; +struct in6_addr ipv6; +}; +}; + static const char *ovnnb_db; static const char *ovnsb_db; static const char *unixctl_path; @@ -2273,6 +2282,15 @@ get_nat_addresses(const struct ovn_port *op, size_t *n) break; } } +if (!is_router_ip) { +for (size_t j = 0; j < op->lrp_networks.n_ipv6_addrs; j++) { +if (!strcmp(nat->external_ip, +op->lrp_networks.ipv6_addrs[j].addr_s)) { +is_router_ip = true; +break; +} +} +} if (!is_router_ip) { ds_put_format(&c_addresses, " %s", nat->external_ip); @@ -6031,9 +6049,28 @@ add_distributed_nat_routes(struct hmap *lflows, const struct ovn_port *op) continue; } +/* Determine if we need to create IPv4 or IPv6 flows */ +ovs_be32 ip; +struct in6_addr ipv6; +int family = AF_INET; +if (!ip_parse(nat->external_ip, &ip) || !ip) { +family = AF_INET6; +if (!ipv6_parse(nat->external_ip, &ipv6)) { +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); +VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration " + "for router %s", nat->external_ip, op->key); +/* We'll create IPv6 flows anyway, but the address + * is probably bogus ... */ +} +} + ds_put_format(&match, "inport == %s && " - "ip4.src == %s && ip4.dst == %s", - op->json_key, nat->logical_ip, nat->external_ip); + "ip%s.src == %s && ip%s.dst == %s", + op->json_key, + family == AF_INET ? "4" : "6", + nat->logical_ip, + family == AF_INET ? "4" : "6", + nat->external_ip); ds_put_format(&actions, "outport = %s; eth.dst = %s; " REGBIT_DISTRIBUTED_NAT" = 1; " REGBIT_NAT_REDIRECT" = 0; next;", @@ -6051,17 +6088,37 @@ add_distributed_nat_routes(struct hmap *lflows, const struct ovn_port *op) !nat2->external_mac || !nat2->external_ip) continue; +family = AF_INET; +if (!ip_parse(nat2->external_ip, &ip) || !ip) { +family = AF_INET6; +if (!ipv6_parse(nat2->external_ip, &ipv6)) { +static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(5, 1); +VLOG_WARN_RL(&rl, "bad ip address %s in nat configuration " + "for router %s", nat2->external_ip, op->key); +/* We'll create IPv6 flows anyway, but the address + * is probably bogus ... */ +} +} + ds_put_format(&match, "inport == %s && " - "ip4.src == %s && ip4.dst == %s", - op->json_key, nat->logical_ip, nat2->external_ip); + "ip%s.src == %s && ip%s.dst == %s", + op->json_key, + family == AF_INET ? "4" : "6", + nat->logical_ip, + family == AF_INET ? "4" : "6", + nat2->external_ip); ds_put_format(&actions, "outport = %s; " "eth.src = %s; eth.dst = %s; " - "reg0 = ip4.dst; reg1 = %s; " + "%sreg0 = ip%s.dst; %sreg1 = %s; " REGBIT_DISTRIBUTED_NAT" = 1; " REGBIT_NAT_REDIRECT" = 0; next;", op->od->l3dgw_port->json_key, op->od->l3dgw_port->lrp_networks.ea_s, - nat2->external_mac, nat->external_ip); + nat2->external_mac, + family == AF_INET ? "" : "xx", + family == AF_INET ? "4" : "6", + family == AF_INET ? "" : "xx", + nat->external_ip); ovn_lflow_add(lflows, op->od, S_ROUTER_IN_IP_ROUTING, 400, ds_cstr(&match), ds_cstr(&actions)); ds_clear(&match); @@ -6298,7 +6355,8 @@ op_put
[ovs-dev] [PATCH ovn 1/6] northd: Fix table ID for IPv6 router ingress.
I noticed that this table number was outdated. This is now table 3. There are a few other sections of code for this table that were all correctly referencing table 3. Signed-off-by: Russell Bryant --- northd/ovn-northd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 194e4bf4a..ae81a6944 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -7064,7 +7064,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, free(snat_ips); } -/* Logical router ingress table 1: IP Input for IPv6. */ +/* Logical router ingress table 3: IP Input for IPv6. */ HMAP_FOR_EACH (op, key_node, ports) { if (!op->nbrp) { continue; -- 2.21.0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH ovn 2/6] actions: Add IPv6 support to lflow NAT actions
Signed-off-by: Russell Bryant --- include/ovn/actions.h | 6 +- lib/actions.c | 35 +++ tests/ovn.at | 18 -- utilities/ovn-trace.c | 14 +- 4 files changed, 53 insertions(+), 20 deletions(-) diff --git a/include/ovn/actions.h b/include/ovn/actions.h index 4e2f4d28d..f4997e9c9 100644 --- a/include/ovn/actions.h +++ b/include/ovn/actions.h @@ -225,7 +225,11 @@ struct ovnact_ct_commit { /* OVNACT_CT_DNAT, OVNACT_CT_SNAT. */ struct ovnact_ct_nat { struct ovnact ovnact; -ovs_be32 ip; +int family; +union { +struct in6_addr ipv6; +ovs_be32 ipv4; +}; uint8_t ltable; /* Logical table ID of next table. */ }; diff --git a/lib/actions.c b/lib/actions.c index c8c9cc5fd..a999a4fda 100644 --- a/lib/actions.c +++ b/lib/actions.c @@ -755,11 +755,18 @@ parse_ct_nat(struct action_context *ctx, const char *name, if (lexer_match(ctx->lexer, LEX_T_LPAREN)) { if (ctx->lexer->token.type != LEX_T_INTEGER -|| ctx->lexer->token.format != LEX_F_IPV4) { -lexer_syntax_error(ctx->lexer, "expecting IPv4 address"); +|| (ctx->lexer->token.format != LEX_F_IPV4 +&& ctx->lexer->token.format != LEX_F_IPV6)) { +lexer_syntax_error(ctx->lexer, "expecting IPv4 or IPv6 address"); return; } -cn->ip = ctx->lexer->token.value.ipv4; +if (ctx->lexer->token.format == LEX_F_IPV4) { +cn->family = AF_INET; +cn->ipv4 = ctx->lexer->token.value.ipv4; +} else if (ctx->lexer->token.format == LEX_F_IPV6) { +cn->family = AF_INET6; +cn->ipv6 = ctx->lexer->token.value.ipv6; +} lexer_get(ctx->lexer); if (!lexer_force_match(ctx->lexer, LEX_T_RPAREN)) { @@ -784,8 +791,12 @@ static void format_ct_nat(const struct ovnact_ct_nat *cn, const char *name, struct ds *s) { ds_put_cstr(s, name); -if (cn->ip) { -ds_put_format(s, "("IP_FMT")", IP_ARGS(cn->ip)); +if (cn->family == AF_INET) { +ds_put_format(s, "("IP_FMT")", IP_ARGS(cn->ipv4)); +} else if (cn->family == AF_INET6) { +ds_put_char(s, '('); +ipv6_format_addr(&cn->ipv6, s); +ds_put_char(s, ')'); } ds_put_char(s, ';'); } @@ -831,9 +842,17 @@ encode_ct_nat(const struct ovnact_ct_nat *cn, nat->flags = 0; nat->range_af = AF_UNSPEC; -if (cn->ip) { +if (cn->family == AF_INET) { nat->range_af = AF_INET; -nat->range.addr.ipv4.min = cn->ip; +nat->range.addr.ipv4.min = cn->ipv4; +if (snat) { +nat->flags |= NX_NAT_F_SRC; +} else { +nat->flags |= NX_NAT_F_DST; +} +} else if (cn->family == AF_INET6) { +nat->range_af = AF_INET6; +nat->range.addr.ipv6.min = cn->ipv6; if (snat) { nat->flags |= NX_NAT_F_SRC; } else { @@ -843,7 +862,7 @@ encode_ct_nat(const struct ovnact_ct_nat *cn, ofpacts->header = ofpbuf_push_uninit(ofpacts, nat_offset); ct = ofpacts->header; -if (cn->ip) { +if (cn->family == AF_INET || cn->family == AF_INET6) { ct->flags |= NX_CT_F_COMMIT; } ofpact_finish(ofpacts, &ct->ofpact); diff --git a/tests/ovn.at b/tests/ovn.at index 9f06059fa..d78689d86 100644 --- a/tests/ovn.at +++ b/tests/ovn.at @@ -1043,15 +1043,18 @@ ct_dnat; ct_dnat(192.168.1.2); encodes as ct(commit,table=19,zone=NXM_NX_REG11[0..15],nat(dst=192.168.1.2)) has prereqs ip +ct_dnat(fd11::2); +encodes as ct(commit,table=19,zone=NXM_NX_REG11[0..15],nat(dst=fd11::2)) +has prereqs ip ct_dnat(192.168.1.2, 192.168.1.3); Syntax error at `,' expecting `)'. ct_dnat(foo); -Syntax error at `foo' expecting IPv4 address. +Syntax error at `foo' expecting IPv4 or IPv6 address. ct_dnat(foo, bar); -Syntax error at `foo' expecting IPv4 address. +Syntax error at `foo' expecting IPv4 or IPv6 address. ct_dnat(); -Syntax error at `)' expecting IPv4 address. +Syntax error at `)' expecting IPv4 or IPv6 address. # ct_snat ct_snat; @@ -1060,15 +1063,18 @@ ct_snat; ct_snat(192.168.1.2); encodes as ct(commit,table=19,zone=NXM_NX_REG12[0..15],nat(src=192.168.1.2)) has prereqs ip +ct_snat(fd11::2); +encodes as ct(commit,table=19,zone=NXM_NX_REG12[0..15],nat(src=fd11::2)) +has prereqs ip ct_snat(192.168.1.2, 192.168.1.3); Syntax error at `,' expecting `)'. ct_snat(foo); -Syntax error at `foo' expecting IPv4 address. +Syntax error at `foo' expecting IPv4 or IPv6 address. ct_snat(foo, bar); -Syntax error at `foo' expecting IPv4 address. +Syntax error at `foo' expecting IPv4 or IPv6 address. ct_snat(); -Syntax error at `)' expecting IPv4 address. +Syntax error at `)' expecting IPv4 or IPv6 address. # ct_clear ct_clear; diff --git a/utilities/ovn-trace.c b/utilities/ovn-trace.c index c95ac
[ovs-dev] [PATCH ovn 0/6] Add IPv6 NAT support
This came up with reviewing the usage of ovn-kubernetes with IPv6. It's more straight forward to start with using IPv6 NAT, matching the IPv4 network topology with Kubernetes. Eventually, we'd want to allow routable IPv6 addresses everywhere, but this gives us the option of using NAT where it's helpful. [PATCH 1/6] northd: Fix table ID for IPv6 router ingress. [PATCH 2/6] actions: Add IPv6 support to lflow NAT actions [PATCH 3/6] ovn-nbctl: Allow IPv6 NAT rules to be added [PATCH 4/6] northd: Add lflows for IPv6 NAT. [PATCH 5/6] system-ovn: Add IPv6 NAT test cases [PATCH 6/6] NEWS: Add IPv6 NAT support NEWS |6 include/ovn/actions.h |6 lib/actions.c | 35 +- northd/ovn-northd.c | 378 - tests/ovn-nbctl.at| 41 +- tests/ovn.at | 18 - tests/system-ovn.at | 862 +- utilities/ovn-nbctl.c | 48 ++ utilities/ovn-trace.c | 14 9 files changed, 1266 insertions(+), 142 deletions(-) -- Russell Bryant ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovn 18/19] ovn-ctl: Support commands for interconnection.
Thanks Han for the patches. OVN-ctl doesn't seem to accept inb/sb args. On Sun, Oct 20, 2019 at 5:55 PM Han Zhou wrote: > Add support for managing IC-NB and IC-SB DBs, and ovn-ic daemon. > > Signed-off-by: Han Zhou > --- > utilities/ovn-ctl | 362 > +++- > utilities/ovn-ctl.8.xml | 91 > 2 files changed, 452 insertions(+), 1 deletion(-) > > diff --git a/utilities/ovn-ctl b/utilities/ovn-ctl > index 2e4e773..f8d23a3 100755 > --- a/utilities/ovn-ctl > +++ b/utilities/ovn-ctl > @@ -33,6 +33,9 @@ done > ovnnb_active_conf_file="$ovn_etcdir/ovnnb-active.conf" > ovnsb_active_conf_file="$ovn_etcdir/ovnsb-active.conf" > ovn_northd_db_conf_file="$ovn_etcdir/ovn-northd-db-params.conf" > +ovninb_active_conf_file="$ovn_etcdir/ovninb-active.conf" > +ovnisb_active_conf_file="$ovn_etcdir/ovnisb-active.conf" > +ovn_ic_db_conf_file="$ovn_etcdir/ovn-ic-db-params.conf" > ## - ## > ## start ## > ## - ## > @@ -61,6 +64,19 @@ stop_ovsdb () { > stop_sb_ovsdb > } > > +stop_inb_ovsdb() { > +stop_xx_ovsdb $DB_INB_PID ovninb_db.ctl > +} > + > +stop_isb_ovsdb() { > +stop_xx_ovsdb $DB_ISB_PID ovnisb_db.ctl > +} > + > +stop_ic_ovsdb () { > +stop_inb_ovsdb > +stop_isb_ovsdb > +} > + > demote_xx_ovsdb () { > local sync_from_addr=$1 > local sync_from_proto=$2 > @@ -91,6 +107,16 @@ demote_ovnsb() { > $DB_SB_SYNC_FROM_PORT $ovnsb_active_conf_file > ovnsb_db.ctl > } > > +demote_ovninb() { > +demote_xx_ovsdb $DB_INB_SYNC_FROM_ADDR $DB_INB_SYNC_FROM_PROTO \ > +$DB_INB_SYNC_FROM_PORT $ovninb_active_conf_file > ovninb_db.ctl > +} > + > +demote_ovnisb() { > +demote_xx_ovsdb $DB_ISB_SYNC_FROM_ADDR $DB_ISB_SYNC_FROM_PROTO \ > +$DB_ISB_SYNC_FROM_PORT $ovnisb_active_conf_file > ovnisb_db.ctl > +} > + > promote_xx_ovsdb() { > local active_conf_file=$1 > local ctl_file=$2 > @@ -106,6 +132,14 @@ promote_ovnsb() { > promote_xx_ovsdb $ovnsb_active_conf_file ovnsb_db.ctl > } > > +promote_ovninb() { > +promote_xx_ovsdb $ovninb_active_conf_file ovninb_db.ctl > +} > + > +promote_ovnisb() { > +promote_xx_ovsdb $ovnisb_active_conf_file ovnisb_db.ctl > +} > + > start_ovsdb__() { > local DB=$1 db=$2 schema_name=$3 table_name=$4 > local db_pid_file > @@ -284,6 +318,19 @@ start_ovsdb () { > start_sb_ovsdb > } > > +start_inb_ovsdb() { > +start_ovsdb__ INB inb OVN_IC_Northbound INB_Global > +} > + > +start_isb_ovsdb() { > +start_ovsdb__ ISB isb OVN_IC_Southbound ISB_Global > +} > + > +start_ic_ovsdb () { > +start_inb_ovsdb > +start_isb_ovsdb > +} > + > sync_status() { > ovn-appctl -t $OVN_RUNDIR/ovn${1}_db.ctl ovsdb-server/sync-status | > awk '{if(NR==1) print $2}' > } > @@ -318,6 +365,36 @@ status_ovsdb () { >fi > } > > +status_ovninb() { > +if ! pidfile_is_running $DB_INB_PID; then > +echo "not-running" > +else > +echo "running/$(sync_status inb)" > +fi > +} > + > +status_ovnisb() { > +if ! pidfile_is_running $DB_ISB_PID; then > +echo "not-running" > +else > +echo "running/$(sync_status isb)" > +fi > +} > + > +status_ic_ovsdb () { > + if ! pidfile_is_running $DB_INB_PID; then > + log_success_msg "OVN IC-Northbound DB is not running" > + else > + log_success_msg "OVN IC-Northbound DB is running" > + fi > + > + if ! pidfile_is_running $DB_ISB_PID; then > + log_success_msg "OVN IC-Southbound DB is not running" > + else > + log_success_msg "OVN IC-Southbound DB is running" > + fi > +} > + > run_nb_ovsdb() { > DB_NB_DETACH=no > start_nb_ovsdb > @@ -328,6 +405,16 @@ run_sb_ovsdb() { > start_sb_ovsdb > } > > +run_inb_ovsdb() { > +DB_INB_DETACH=no > +start_inb_ovsdb > +} > + > +run_isb_ovsdb() { > +DB_ISB_DETACH=no > +start_isb_ovsdb > +} > + > start_northd () { > if [ ! -e $ovn_northd_db_conf_file ]; then > if test X"$OVN_MANAGE_OVSDB" = Xyes; then > @@ -373,6 +460,41 @@ start_northd () { > fi > } > > +start_ic () { > +if [ ! -e $ovn_ic_db_conf_file ]; then > +ovn_ic_params="--ovnnb-db=$OVN_NORTHD_NB_DB \ > + --ovnsb-db=$OVN_NORTHD_SB_DB \ > + --ovninb-db=$OVN_IC_NB_DB \ > + --ovnisb-db=$OVN_IC_SB_DB" > +else > +ovn_ic_params="`cat $ovn_ic_db_conf_file`" > +fi > + > +if daemon_is_running ovn-ic; then > +log_success_msg "ovn-ic is already running" > +else > +set ovn-ic > +if test X"$OVN_IC_LOGFILE" != X; then > +set "$@" --log-file=$OVN_IC_LOGFILE > +fi > +if test X"$OVN_IC_SSL_KEY" != X; then > +set "$@" --private-key=$OVN_IC_SSL_KEY > +fi > +if test X"$OVN_IC_SSL_CERT" != X; then > +set "$@" --certificate=$OVN_IC_SSL_CERT > +fi > +if test X"$OVN_IC_SSL_CA_CERT" != X; then > +
Re: [ovs-dev] [PATCH ovn 10/19] ovn-ic: Interconnection controller with AZ registeration.
Thanks Han for the patches. On Sun, Oct 20, 2019 at 5:54 PM Han Zhou wrote: > This patch introduces interconnection controller, ovn-ic, and > implements the basic AZ registration feature: taking the AZ > name from NB DB and create an Availability_Zone entry in > IC-SB DB. > > Signed-off-by: Han Zhou > --- > Makefile.am | 1 + > ic/.gitignore| 2 + > ic/automake.mk | 10 ++ > ic/ovn-ic.8.xml | 111 ++ > ic/ovn-ic.c | 413 > +++ > ovn-nb.ovsschema | 5 +- > ovn-nb.xml | 7 + > tests/automake.mk| 4 +- > tests/ovn-ic.at | 29 > tests/ovn-macros.at | 161 > tests/testsuite.at | 1 + > tutorial/ovs-sandbox | 78 +- > 12 files changed, 792 insertions(+), 30 deletions(-) > create mode 100644 ic/.gitignore > create mode 100644 ic/automake.mk > create mode 100644 ic/ovn-ic.8.xml > create mode 100644 ic/ovn-ic.c > create mode 100644 tests/ovn-ic.at > > diff --git a/Makefile.am b/Makefile.am > index 33c18c5..d22a220 100644 > --- a/Makefile.am > +++ b/Makefile.am > @@ -500,4 +500,5 @@ include selinux/automake.mk > include controller/automake.mk > include controller-vtep/automake.mk > include northd/automake.mk > +include ic/automake.mk > include build-aux/automake.mk > diff --git a/ic/.gitignore b/ic/.gitignore > new file mode 100644 > index 000..1b73eb4 > --- /dev/null > +++ b/ic/.gitignore > @@ -0,0 +1,2 @@ > +/ovn-ic > +/ovn-ic.8 > diff --git a/ic/automake.mk b/ic/automake.mk > new file mode 100644 > index 000..8e71bc3 > --- /dev/null > +++ b/ic/automake.mk > @@ -0,0 +1,10 @@ > +# ovn-ic > +bin_PROGRAMS += ic/ovn-ic > +ic_ovn_ic_SOURCES = ic/ovn-ic.c > +ic_ovn_ic_LDADD = \ > + lib/libovn.la \ > + $(OVSDB_LIBDIR)/libovsdb.la \ > + $(OVS_LIBDIR)/libopenvswitch.la > +man_MANS += ic/ovn-ic.8 > +EXTRA_DIST += ic/ovn-ic.8.xml > +CLEANFILES += ic/ovn-ic.8 > diff --git a/ic/ovn-ic.8.xml b/ic/ovn-ic.8.xml > new file mode 100644 > index 000..00f33aa > --- /dev/null > +++ b/ic/ovn-ic.8.xml > @@ -0,0 +1,111 @@ > + > + > +Name > +ovn-ic -- Open Virtual Network interconnection controller > + > +Synopsis > +ovn-ic [options] > + > +Description > + > + ovn-ic, OVN interconnection controller, is a > centralized > + daemon which communicates with global interconnection databases > INB/ISB > + to configure and exchange data with local NB/SB for interconnecting > + with other OVN deployments. > + > + > +Options > + > + --ovnnb-db=database > + > +The OVSDB database containing the OVN Northbound Database. If the > +OVN_NB_DB environment variable is set, its value is > used > +as the default. Otherwise, the default is > +unix:@RUNDIR@/ovnnb_db.sock. > + > + --ovnsb-db=database > + > +The OVSDB database containing the OVN Southbound Database. If the > +OVN_SB_DB environment variable is set, its value is > used > +as the default. Otherwise, the default is > +unix:@RUNDIR@/ovnsb_db.sock. > + > + --ovninb-db=database > + > +The OVSDB database containing the OVN Interconnection Northbound > +Database. If the OVN_INB_DB environment variable is > set, > +its value is used as the default. Otherwise, the default is > +unix:@RUNDIR@/ovninb_db.sock. > + > + --ovnisb-db=database > + > +The OVSDB database containing the OVN Interconnection Southbound > +Database. If the OVN_ISB_DB environment variable is > set, > +its value is used as the default. Otherwise, the default is > +unix:@RUNDIR@/ovnisb_db.sock. > + > + > + > + database in the above options must be an OVSDB active or > + passive connection method, as described in ovsdb(7). > + > + > +Daemon Options > +http://www.w3.org/2003/XInclude"/> > + > +Logging Options > +http://www.w3.org/2003/XInclude"/> > + > +PKI Options > + > + PKI configuration is required in order to use SSL for the > connections to > + the Northbound and Southbound databases. > + > +http://www.w3.org/2003/XInclude"/> > + > +Other Options > + + xmlns:xi="http://www.w3.org/2003/XInclude"/> > + > + + xmlns:xi="http://www.w3.org/2003/XInclude"/> > + > +Runtime Management Commands > + > + ovs-appctl can send commands to a running > + ovn-ic process. The currently supported commands > + are described below. > + > + exit > + > +Causes ovn-ic to gracefully terminate. > + > + > + pause > + > +Pauses the ovn-ic operation from processing any Northbound and > +Southbound database changes. > + > + > + resume > + > +Resumes the ovn-ic operation to process Northbound and > +So
Re: [ovs-dev] [PATCH ovn 19/19] tutorial: Add tutorial for OVN Interconnection.
Thanks Han for the correction. Just one more minor typo in the tutorial below. I hit some roadblocks to start ic controller on different az but got my setup running with workarounds in current code and have posted in comments which needs fix for sure. I tried with 2 different ovn setups with 2 AZs where each ovn az uses1 hv, 1 gw and 1 lport bound to hv. I am able to access the workloads across azs. Tested-by: Aliasgar Ginwala > On Sun, Oct 20, 2019 at 5:55 PM Han Zhou wrote: > Added tutorial, and also updated NEWS and TODO. > > Signed-off-by: Han Zhou > --- > Documentation/automake.mk | 1 + > Documentation/tutorials/index.rst | 1 + > Documentation/tutorials/ovn-interconnection.rst | 181 > > NEWS| 5 + > TODO.rst| 10 ++ > 5 files changed, 198 insertions(+) > create mode 100644 Documentation/tutorials/ovn-interconnection.rst > > diff --git a/Documentation/automake.mk b/Documentation/automake.mk > index 5968d69..15d261d 100644 > --- a/Documentation/automake.mk > +++ b/Documentation/automake.mk > @@ -20,6 +20,7 @@ DOC_SOURCE = \ > Documentation/tutorials/ovn-sandbox.rst \ > Documentation/tutorials/ovn-ipsec.rst \ > Documentation/tutorials/ovn-rbac.rst \ > + Documentation/tutorials/ovn-interconnection.rst \ > Documentation/topics/index.rst \ > Documentation/topics/testing.rst \ > Documentation/topics/high-availability.rst \ > diff --git a/Documentation/tutorials/index.rst > b/Documentation/tutorials/index.rst > index 1cf083e..4ff6e16 100644 > --- a/Documentation/tutorials/index.rst > +++ b/Documentation/tutorials/index.rst > @@ -43,3 +43,4 @@ vSwitch. > ovn-openstack > ovn-rbac > ovn-ipsec > + ovn-interconnection > diff --git a/Documentation/tutorials/ovn-interconnection.rst > b/Documentation/tutorials/ovn-interconnection.rst > new file mode 100644 > index 000..1320d41 > --- /dev/null > +++ b/Documentation/tutorials/ovn-interconnection.rst > @@ -0,0 +1,181 @@ > +.. > + Licensed under the Apache License, Version 2.0 (the "License"); you > may > + not use this file except in compliance with the License. You may > obtain > + a copy of the License at > + > + http://www.apache.org/licenses/LICENSE-2.0 > + > + Unless required by applicable law or agreed to in writing, software > + distributed under the License is distributed on an "AS IS" BASIS, > WITHOUT > + WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. > See the > + License for the specific language governing permissions and > limitations > + under the License. > + > + Convention for heading levels in OVN documentation: > + > + === Heading 0 (reserved for the title in a document) > + --- Heading 1 > + ~~~ Heading 2 > + +++ Heading 3 > + ''' Heading 4 > + > + Avoid deeper levels because they do not render well. > + > +=== > +OVN Interconnection > +=== > + > +This document provides a guide for interconnecting multiple OVN > deployements > +with OVN managed tunneling. More details about the OVN Interconnectiong > design > +can be found in ``ovn-architecture``\(7) manpage. > + > +This document assumes two or more OVN deployments are setup and runs > normally, > +possibly at different data-centers, and the gateway chassises of each OVN > +are with IP addresses that are reachable between each other. > + > +Setup Interconnection Databases > +--- > + > +To interconnect different OVNs, you need to create global OVSDB databases > that > +store interconnection data. The databases can be setup on any nodes that > are > +accessible from all the central nodes of each OVN deployment. It is > +recommended that the global databases are setup with HA, with nodes in > +different avaialbility zones, to avoid single point of failure. > + > +1. Install OVN packages on each global database node. > + > +2. Start OVN IC-NB and IC-SB databases. > + > + On each global database node :: > + > +$ ovn-ctl [options] start_ic_ovsdb > + > + Options depends on the HA mode you use. See details with :: > + > +$ ovn-ctl --help. > + > +Register OVN to Interconnection Databases > +- > + > +For each OVN deployment, set an availability zone name :: > + > +$ ovn-nbctl set NB_Global . name= > + > +The name should be unique across all OVN deployments, e.g. ovn-east, > +ovn-west, etc. > + > +For each OVN deployment, start the ``ovn-ic`` daemon on central nodes :: > + > +$ ovn-ctl --ovninb-db= --ovnisb-db= \ > + --ovnnb-db= --ovnsb-db= [more options] start_ic > + > +An example of is ``tcp::6645``, or for > +clustered DB: ``tcp::6645,tcp::6645,tcp::6645``. > + is similar, but usually with a different port nu
Re: [ovs-dev] [PATCH v3 1/2 ovn] OVN: ADD nbctl cli to mark a dnat_and_snat rule as stateless
Bleep bloop. Greetings Ankur Sharma, I am a robot and I have tried out your patch. Thanks for your contribution. I encountered some error that I wasn't expecting. See the details below. checkpatch: WARNING: Line is 220 characters long (recommended limit is 79) #121 FILE: utilities/ovn-nbctl.8.xml:668: [--may-exist] [--stateless]lr-nat-add router type external_ip logical_ip [logical_port external_mac] Lines checked: 220, Warnings: 1, Errors: 0 Please check this out. If you feel there has been an error, please email acon...@redhat.com Thanks, 0-day Robot ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] New PO
Good Day, Please do check our new order list and send quote us. Get back to us with best price confirmation. Thanks Regards, Sui Purchase Manager,___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v3 2/2 ovn] OVN: Use ip4.src and ip4.dst actions for NAT rules
For dnat_and_snat rules which are meant to be stateless instead of using ct_snat/dnat OVN actions, we will use ip4.src/ip4.dst. This actions will do 1:1 mapping to inner ip to external ip, while recalculating the checksums. Signed-off-by: Ankur Sharma --- northd/ovn-northd.8.xml | 34 -- northd/ovn-northd.c | 86 +++-- tests/ovn-northd.at | 57 + tests/ovn.at| 311 4 files changed, 472 insertions(+), 16 deletions(-) diff --git a/northd/ovn-northd.8.xml b/northd/ovn-northd.8.xml index d3e0e5e..87a6909 100644 --- a/northd/ovn-northd.8.xml +++ b/northd/ovn-northd.8.xml @@ -1807,7 +1807,9 @@ icmp6 { to change the source IP address of a packet from A to B, a priority-90 flow matches ip && ip4.dst == B with an action - ct_snat; . + ct_snat; . If the NAT rule is of type dnat_and_snat + and has stateless=true in the options, then the action + would be ip4.dst=(B). @@ -1827,7 +1829,10 @@ icmp6 { B, a priority-100 flow matches ip && ip4.dst == B && inport == GW, where GW is the logical router gateway port, with an - action ct_snat;. + action ct_snat;. If the NAT rule is of type + dnat_and_snat and has stateless=true in the + options, then the action would be ip4.dst= + (B). @@ -1947,7 +1952,10 @@ icmp6 { Gateway router is configured to force SNAT any DNATed packet, the above action will be replaced by flags.force_snat_for_dnat = 1; flags.loopback = 1; -ct_dnat(B);. +ct_dnat(B);. If the NAT rule is of type +dnat_and_snat and has stateless=true in the +options, then the action would be ip4.dst= +(B). @@ -1979,7 +1987,10 @@ icmp6 { B, a priority-100 flow matches ip && ip4.dst == B && inport == GW, where GW is the logical router gateway port, with an - action ct_dnat(B);. + action ct_dnat(B);. If the NAT rule is of + type dnat_and_snat and has stateless=true in the + options, then the action would be ip4.dst= + (B). @@ -2653,7 +2664,10 @@ nd_ns { matches ip && ip4.src == B && outport == GW, where GW is the logical router gateway port, with an action - ct_dnat;. + ct_dnat;. If the NAT rule is of type + dnat_and_snat and has stateless=true in the + options, then the action would be ip4.src= + (B). @@ -2711,7 +2725,10 @@ nd_ns { ip && ip4.src == A with an action ct_snat(B);. The priority of the flow is calculated based on the mask of A, with matches - having larger masks getting higher priorities. + having larger masks getting higher priorities. If the NAT rule is + of type dnat_and_snat and has stateless=true in the + options, then the action would be ip4.src= + (B). A priority-0 logical flow with match 1 has actions @@ -2734,7 +2751,10 @@ nd_ns { logical router gateway port, with an action ct_snat(B);. The priority of the flow is calculated based on the mask of A, with matches - having larger masks getting higher priorities. + having larger masks getting higher priorities. If the NAT rule + is of type dnat_and_snat and has stateless=true + in the options, then the action would be ip4.src= + (B). diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 194e4bf..4431659 100644 --- a/northd/ovn-northd.c +++ b/northd/ovn-northd.c @@ -6500,6 +6500,18 @@ copy_ra_to_sb(struct ovn_port *op, const char *address_mode) smap_destroy(&options); } +static inline bool +lrouter_nat_is_stateless(const struct nbrec_nat *nat) +{ +const char *stateless = smap_get(&nat->options, "stateless"); + +if (stateless && !strcmp(stateless, "true")) { +return true; +} + +return false; +} + static void build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, struct hmap *lflows, struct shash *meter_groups) @@ -7261,6 +7273,7 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, nat = od->nbr->nat[i]; ovs_be32 ip, mask; +bool stateless = lrouter_nat_is_stateless(nat); char *error = ip_parse_masked(nat->external_ip, &ip, &mask); if (error || mask != OVS_BE32_MAX) { @@ -7326,15 +7339,26 @@ build_lrouter_flows(struct hmap *datapaths, struct hmap *ports, if (!od->l3dgw_port) { /* Gateway router. */ ds_clear(&match); +ds_clear(&actions); ds_put
[ovs-dev] [PATCH v3 1/2 ovn] OVN: ADD nbctl cli to mark a dnat_and_snat rule as stateless
Adding ovn-nbctl to mark a dnat_and_snat rule as stateless. This configuration will added to "options" column of NAT table. Signed-off-by: Ankur Sharma --- ovn-nb.ovsschema | 6 -- ovn-nb.xml| 5 + tests/ovn-nbctl.at| 29 + utilities/ovn-nbctl.8.xml | 12 +++- utilities/ovn-nbctl.c | 30 +- 5 files changed, 78 insertions(+), 4 deletions(-) diff --git a/ovn-nb.ovsschema b/ovn-nb.ovsschema index 2c87cbb..084305b 100644 --- a/ovn-nb.ovsschema +++ b/ovn-nb.ovsschema @@ -1,7 +1,7 @@ { "name": "OVN_Northbound", -"version": "5.16.0", -"cksum": "923459061 23095", +"version": "5.17.0", +"cksum": "1128988054 23237", "tables": { "NB_Global": { "columns": { @@ -345,6 +345,8 @@ "snat", "dnat_and_snat" ]]}}}, +"options": {"type": {"key": "string", "value": "string", + "min": 0, "max": "unlimited"}}, "external_ids": { "type": {"key": "string", "value": "string", "min": 0, "max": "unlimited"}}}, diff --git a/ovn-nb.xml b/ovn-nb.xml index 8990894..d8f3237 100644 --- a/ovn-nb.xml +++ b/ovn-nb.xml @@ -2297,6 +2297,11 @@ + + Indicates if a dnat_and_snat rule should lead to connection + tracking state or not. + + See External IDs at the beginning of this document. diff --git a/tests/ovn-nbctl.at b/tests/ovn-nbctl.at index 01091dd..acbe73c 100644 --- a/tests/ovn-nbctl.at +++ b/tests/ovn-nbctl.at @@ -516,6 +516,31 @@ dnat_and_snat30.0.0.2 192.168.1.3 snat 30.0.0.1 192.168.1.0/24 ]) +AT_CHECK([ovn-nbctl --bare --columns=options list nat | grep stateless=true| wc -l], [0], +[0 +]) +AT_CHECK([ovn-nbctl --stateless lr-nat-add lr0 dnat_and_snat 40.0.0.2 192.168.1.4]) +AT_CHECK([ovn-nbctl --bare --columns=options list nat | grep stateless=true| wc -l], [0], +[1 +]) +AT_CHECK([ovn-nbctl --stateless lr-nat-add lr0 dnat 40.0.0.2 192.168.1.4], [1], [], +[ovn-nbctl: stateless is not applicable to dnat or snat types +]) +AT_CHECK([ovn-nbctl --stateless lr-nat-add lr0 snat 40.0.0.2 192.168.1.4], [1], [], +[ovn-nbctl: stateless is not applicable to dnat or snat types +]) +AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 40.0.0.2 192.168.1.5], [1], [], +[ovn-nbctl: 40.0.0.2, 192.168.1.5: External ip cannot be shared across stateless and stateful NATs +]) +AT_CHECK([ovn-nbctl lr-nat-add lr0 dnat 40.0.0.2 192.168.1.5], [1], [], +[ovn-nbctl: 40.0.0.2, 192.168.1.5: External ip cannot be shared across stateless and stateful NATs +]) + +AT_CHECK([ovn-nbctl lr-nat-add lr0 snat 40.0.0.3 192.168.1.6]) +AT_CHECK([ovn-nbctl --stateless lr-nat-add lr0 dnat_and_snat 40.0.0.3 192.168.1.7], [1], [], +[ovn-nbctl: 40.0.0.3, 192.168.1.7: External ip cannot be shared across stateless and stateful NATs +]) + dnl Deletes the NATs AT_CHECK([ovn-nbctl lr-nat-del lr0 dnat_and_snat 30.0.0.3], [1], [], [ovn-nbctl: no matching NAT with the type (dnat_and_snat) and external_ip (30.0.0.3) @@ -533,14 +558,18 @@ AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl TYPE EXTERNAL_IPLOGICAL_IPEXTERNAL_MAC LOGICAL_PORT dnat 30.0.0.1 192.168.1.2 dnat_and_snat30.0.0.2 192.168.1.3 +dnat_and_snat40.0.0.2 192.168.1.4 snat 30.0.0.1 192.168.1.0/24 +snat 40.0.0.3 192.168.1.6 ]) AT_CHECK([ovn-nbctl lr-nat-del lr0 dnat]) AT_CHECK([ovn-nbctl lr-nat-list lr0], [0], [dnl TYPE EXTERNAL_IPLOGICAL_IPEXTERNAL_MAC LOGICAL_PORT dnat_and_snat30.0.0.2 192.168.1.3 +dnat_and_snat40.0.0.2 192.168.1.4 snat 30.0.0.1 192.168.1.0/24 +snat 40.0.0.3 192.168.1.6 ]) AT_CHECK([ovn-nbctl lr-nat-del lr0]) diff --git a/utilities/ovn-nbctl.8.xml b/utilities/ovn-nbctl.8.xml index fd75c0e..2161d8c 100644 --- a/utilities/ovn-nbctl.8.xml +++ b/utilities/ovn-nbctl.8.xml @@ -665,7 +665,7 @@ NAT Commands - [--may-exist] lr-nat-add router type external_ip logical_ip [logical_port external_mac] + [--may-exist] [--stateless]lr-nat-add router type external_ip logical_ip [logical_port external_mac] Adds the specified NAT to router. @@ -681,8 +681,18 @@ The logical_port is the name of an existing logical switch port where the logical_ip resides. The external_mac is an Ethernet address. + The --stateless + When --stateless is specified then it implies that + we will be not use
[ovs-dev] [PATCH v3 0/2] ALLOW Stateless NAT operations
NAT implementation in OVN uses connection tracker to replace source and dest ips. This implementation works fine and is the right approach for cases where external ips are shared (i.e. SNAT) or where we replace ip only when relevant flow is there (i.e. DNAT). However, it opens the possibility of Dos Attack, where attacker can easily simluate multiple 5 tuples, to consume the connection tracker entry in an OVN chassis. This way they can easily attain the CT limit, there by impacting the usage of it by other features like valid NAT, ACL etc. This attack is even worse, when external ip is a public ip, i.e internet routable ip. In this patch we are introducing an option with NAT table entry. Option "stateless=true" indicates that NAT implmentation should not be using CT, i.e it should not use ct_snat/dnat actions. Instead of ct_* actions, we will use ip4.src/dst OVN actions, which will replace source and destination ips, while recalculating the checksums. This option is applicable only for the NAT rules which can be 1:1 mapped between inner and external ips, i.e dnat_and_snat rule. Signed-off-by: Ankur Sharma Ankur Sharma (2): OVN: ADD nbctl cli to mark a dnat_and_snat rule as stateless OVN: Use ip4.src and ip4.dst actions for NAT rules northd/ovn-northd.8.xml | 34 +++-- northd/ovn-northd.c | 86 +++-- ovn-nb.ovsschema | 6 +- ovn-nb.xml| 5 + tests/ovn-nbctl.at| 29 + tests/ovn-northd.at | 57 + tests/ovn.at | 311 ++ utilities/ovn-nbctl.8.xml | 12 +- utilities/ovn-nbctl.c | 30 - 9 files changed, 550 insertions(+), 20 deletions(-) -- 1.8.3.1 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v2] travis: support ppc64le builds
Add support for travis-ci ppc64le builds. - Updated matrix in .travis.yml to include a ppc64le build. - Move package install needed for 32bit builds to .travis/linux-prepare.sh. To keep the total build time at an acceptable level only a single build job is included in the matrix for ppc64le. A build report example can be found here [1] [0] http://travis-ci.org/ [1] https://travis-ci.org/djlwilder/ovs/builds/604615966 Signed-off-by: David Wilder --- .travis.yml | 5 +++-- .travis/linux-prepare.sh | 5 - 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5676d9748..c99f26815 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,6 @@ addons: apt: packages: - bc - - gcc-multilib - libssl-dev - llvm-dev - libjemalloc1 @@ -24,7 +23,6 @@ addons: - libelf-dev - selinux-policy-dev - libunbound-dev - - libunbound-dev:i386 - libunwind-dev before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh @@ -50,6 +48,9 @@ matrix: - os: osx compiler: clang env: OPTS="--disable-ssl" +- os: linux-ppc64le + compiler: gcc + env: OPTS="--disable-ssl" script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh index e546d32cb..299f9de99 100755 --- a/.travis/linux-prepare.sh +++ b/.travis/linux-prepare.sh @@ -18,5 +18,8 @@ pip install --user --upgrade docutils if [ "$M32" ]; then # 32-bit and 64-bit libunwind can not be installed at the same time. # This will remove the 64-bit libunwind and install 32-bit version. -sudo apt-get install -y libunwind-dev:i386 +sudo apt-get install -y \ + gcc-multilib \ + libunwind-dev:i386 \ + libunbound-dev:i386 fi -- 2.23.0.162.gf1d4a28 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH net-next v4 08/10] net: openvswitch: fix possible memleak on destroy flow-table
On Tue, Oct 29, 2019 at 4:31 AM Tonghao Zhang wrote: > > On Tue, Oct 29, 2019 at 3:38 PM Pravin Shelar wrote: > > > > On Sun, Oct 27, 2019 at 11:49 PM Tonghao Zhang > > wrote: > > > > > > On Thu, Oct 24, 2019 at 3:14 PM Pravin Shelar wrote: > > > > > > > > On Tue, Oct 22, 2019 at 7:35 PM Tonghao Zhang > > > > wrote: > > > > > > > > > > On Tue, Oct 22, 2019 at 2:58 PM Pravin Shelar wrote: > > > > > > > > > > ... > > > > > > ... > > > > > struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, > > > > > @@ -400,10 +458,9 @@ static struct table_instance > > > > > *table_instance_rehash(struct table_instance *ti, > > > > > return new_ti; > > > > > } > > > > > > > > > > -int ovs_flow_tbl_flush(struct flow_table *flow_table) > > > > > +int ovs_flow_tbl_flush(struct flow_table *table) > > > > > { > > > > > - struct table_instance *old_ti, *new_ti; > > > > > - struct table_instance *old_ufid_ti, *new_ufid_ti; > > > > > + struct table_instance *new_ti, *new_ufid_ti; > > > > > > > > > > new_ti = table_instance_alloc(TBL_MIN_BUCKETS); > > > > > if (!new_ti) > > > > > @@ -412,16 +469,12 @@ int ovs_flow_tbl_flush(struct flow_table > > > > > *flow_table) > > > > > if (!new_ufid_ti) > > > > > goto err_free_ti; > > > > > > > > > > - old_ti = ovsl_dereference(flow_table->ti); > > > > > - old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); > > > > > + table_instance_destroy(table, true); > > > > > > > > > This would destroy running table causing unnecessary flow miss. Lets > > > > keep current scheme of setting up new table before destroying current > > > > one. > > > > > > > > > - rcu_assign_pointer(flow_table->ti, new_ti); > > > > ... > > > /* Must be called with OVS mutex held. */ > > > void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) > > > { > > > @@ -752,17 +794,7 @@ void ovs_flow_tbl_remove(struct flow_table > > > *table, struct sw_flow *flow) > > > struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); > > > > > > BUG_ON(table->count == 0); > > > - hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); > > > - table->count--; > > > - if (ovs_identifier_is_ufid(&flow->id)) { > > > - hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); > > > - table->ufid_count--; > > > - } > > > - > > > - /* RCU delete the mask. 'flow->mask' is not NULLed, as it should > > > be > > > -* accessible as long as the RCU read lock is held. > > > -*/ > > > - flow_mask_remove(table, flow->mask); > > > + table_instance_remove(table, ti, ufid_ti, flow, true); > > > } > > Lets rename table_instance_remove() to imply it is freeing a flow. > hi Pravin, the function ovs_flow_free will free the flow actually. In > -ovs_flow_cmd_del > ovs_flow_tbl_remove > ... > ovs_flow_free > > In -table_instance_destroy > table_instance_remove > ovs_flow_free > > But if rename the table_instance_remove, table_instance_flow_free ? table_instance_flow_free() looks good. ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] Comunicación efectiva y empática del coach
26 de Noviembre | Horario de 10:00 a 17:00 hrs. | (hora del centro de México) - Coaching y liderazgo efectivo. - ¿De qué hablaremos? Este webinar tiene como finalidad llevar a los participantes a ser capaces de intervenir positivamente en otros individuos en funciones de liderazgo, comprendiendo diversos estilos y aplicando en la práctica acciones que permitan mejorar la toma de decisiones para alcanzar los objetivos de su organización. ¿Qué aprenderás?: - Diferencia entre liderazgo y dirección. - Definiciones del Coaching. - Beneficios del Coaching. - Qué no es Coaching? - Las 6 C’s del coaching exitoso. - Técnicas de coaching de liderazgo efectivo. - Comunicación efectiva y empática del coach. - Habilidades del Coach. Solicita información respondiendo a este correo con la palabra Coaching, junto con los siguientes datos: Nombre: Correo electrónico: Número telefónico: Dirigido a: Empresarios, gerentes, líderes de proyecto, jefes de área, supervisores y todo aquel que tenga contacto con el personal de forma directa o indirecta y busque mejorar la capacidad de influir en los demás. Números de Atención: (045) 55 15 54 66 30 - (045) 55 85567293 - (045) 5530167085 En caso de que haya recibido este correo sin haberlo solicitado o si desea dejar de recibir nuestra promoción favor de responder con la palabra baja o enviar un correo a bajas@ innovalearn.net ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCHv6] netdev-afxdp: Add need_wakeup supprt.
On 29.10.2019 0:32, William Tu wrote: On Mon, Oct 28, 2019 at 12:11 PM Ilya Maximets wrote: On 28.10.2019 11:46, Eelco Chaudron wrote: On 23 Oct 2019, at 23:06, William Tu wrote: The patch adds support for using need_wakeup flag in AF_XDP rings. A new option, use_need_wakeup, is added. When this option is used, it means that OVS has to explicitly wake up the kernel RX, using poll() syscall and wake up TX, using sendto() syscall. This feature improves the performance by avoiding unnecessary sendto syscalls for TX. For RX, instead of kernel always busy-spinning on fille queue, OVS wakes up the kernel RX processing when fill queue is replenished. The need_wakeup feature is merged into Linux kernel bpf-next tee with commit 77cd0d7b3f25 ("xsk: add support for need_wakeup flag in AF_XDP rings") and OVS enables it by default, if libbpf supports it. If users enable it but runs in an older version of libbpf, then the need_wakeup feature has no effect, and a warning message is logged. For virtual interface, it's better set use_need_wakeup=false, since the virtual device's AF_XDP xmit is synchronous: the sendto syscall enters kernel and process the TX packet on tx queue directly. On Intel Xeon E5-2620 v3 2.4GHz system, performance of physical port to physical port improves from 6.1Mpps to 7.3Mpps. Suggested-by: Ilya Maximets Signed-off-by: William Tu Reviewed based on diff from previous version, also did quick compile test with and without need_wakeup supported kernel. No actual performance tests ran, as my setup is in a messed up state :( One small issue (guess can be fixed at apply time), the subject mentions “supprt”, it’s missing an o. Acked-by: Eelco Chaudron Thanks, I could fix the 'supprt' while applying the patch. But I have one more question. William, Eelco, what do you think about renaming the option itself from "options:use_need_wakeup" to "options:use-need-wakeup"? Most of the netdev options for netdev-dpdk and netdev-linux except few of them (e.g. n_rxq, n_rxq_desc) uses dashes in their names instead of underscores. I agree that right now there is a mess in OVS and there is no specified convention for options naming, but it might be good idea to name all the new options in a similar way, to keep the API more or less consistent. What do you think? I could squash in below diff while applying the patch (I also added a NEWS entry): Hi Ilya, Yes, I'm ok with using dashes. diff below looks good to me. Thanks William Thanks, William and Eelco! I fixed a typo in the subject line, squashed the diff and applied to master. Best regards, Ilya Maximets. ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ] travis: support ppc64le builds
On 2019-10-29 09:52, Ilya Maximets wrote: On 28.10.2019 22:22, David Wilder wrote: Add support for travis-ci ppc64le builds. - Updated matrix in .travis.yml to include a ppc64le build. - Added support to install packages needed by specific architectures. To keep the total build time at an acceptable level only a single build job is included in the matrix for ppc64le. A build report example can be found here [1] [0] https://urldefense.proofpoint.com/v2/url?u=http-3A__travis-2Dci.org_&d=DwICaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=7ndxyKjH9UrBD68Us2WP1wI4BwEBQbeAyz8i_vwCCaI&m=6JANehIfGoxUMtwHhe4yob4UPeby0Y8ovgzRDIyJZFo&s=UMYL8rzJNph87seC0oJLBiWoe-sUSL80AJy0RMTgBzQ&e= [1] https://urldefense.proofpoint.com/v2/url?u=https-3A__travis-2Dci.org_djlwilder_ovs_builds_604098141&d=DwICaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=7ndxyKjH9UrBD68Us2WP1wI4BwEBQbeAyz8i_vwCCaI&m=6JANehIfGoxUMtwHhe4yob4UPeby0Y8ovgzRDIyJZFo&s=pyd2yQpQ0snpwGE5El4RYZsatwl74sthM1KLqtIKCnY&e= Signed-off-by: David Wilder Hi David, Thanks for working on this. I have a couple of question regarding ppc64le support by TravisCI. It seems that they are not supporting this architecture officially and refusing[1] to solve any issues that appears while using it. There also no official documentation. It's kind of a hidden feature that some projects are using for their own risk [2]. Do you know why this happens or maybe you have some insights about what is going on/how it works? Work is going on to increase ppc64le support on Travis by the end of the year. I dont have any details yet. My plan is to keep this to build-only ci until then. Important, ppc64le VM are only available on travis-ci.org, they are not available on travis-ci.com. The API is also a bit strange because Travis started to officially support arm builds, but this is done via 'arch' knob, not the 'os'. Will it be changed over time for ppc64le? Sorry, I dont know. [1] https://urldefense.proofpoint.com/v2/url?u=https-3A__travis-2Dci.community_t_ppc64le-2Darch-2Dsupport-2Don-2Dtravis-2Dci-2Dcom-2Dvs-2Dtravis-2Dci-2Dorg_2898_2&d=DwICaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=7ndxyKjH9UrBD68Us2WP1wI4BwEBQbeAyz8i_vwCCaI&m=6JANehIfGoxUMtwHhe4yob4UPeby0Y8ovgzRDIyJZFo&s=TrXdSxjvnbbVQz7EzR5r0aE93lZMSdCiIUQT2wt8E3I&e= [2] https://urldefense.proofpoint.com/v2/url?u=https-3A__github.com_openssl_openssl_commit_13da3ad00c80e1da816ca27f6c15b0ecee1bb0b8&d=DwICaQ&c=jf_iaSHvJObTbx-siA1ZOg&r=7ndxyKjH9UrBD68Us2WP1wI4BwEBQbeAyz8i_vwCCaI&m=6JANehIfGoxUMtwHhe4yob4UPeby0Y8ovgzRDIyJZFo&s=RWVuli-BT8E2IsW3rAA9MtqCVPZahNk8k7yqxEbgTT4&e= Few code comments inline. --- .travis.yml | 5 +++-- .travis/linux-prepare.sh | 18 ++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5676d9748..c99f26815 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,6 @@ addons: apt: packages: - bc - - gcc-multilib - libssl-dev - llvm-dev - libjemalloc1 @@ -24,7 +23,6 @@ addons: - libelf-dev - selinux-policy-dev - libunbound-dev - - libunbound-dev:i386 - libunwind-dev before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh @@ -50,6 +48,9 @@ matrix: - os: osx compiler: clang env: OPTS="--disable-ssl" +- os: linux-ppc64le + compiler: gcc + env: OPTS="--disable-ssl" script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh index e546d32cb..f3a9a6d44 100755 --- a/.travis/linux-prepare.sh +++ b/.travis/linux-prepare.sh @@ -15,8 +15,18 @@ cd .. pip install --disable-pip-version-check --user six flake8 hacking pip install --user --upgrade docutils -if [ "$M32" ]; then -# 32-bit and 64-bit libunwind can not be installed at the same time. -# This will remove the 64-bit libunwind and install 32-bit version. -sudo apt-get install -y libunwind-dev:i386 +# Include packages needed by specific architectures. +if [ $TRAVIS_ARCH == amd64 ]; then + sudo apt-get install -y \ +libunbound-dev:i386 \ +gcc-multilib These packages are only needed for 32bit build, so you may just move them to the command that installs 32bit version of libunwind. And since you're not building 32bit with ppc64le, the code could look like: if [ "$M32" ]; then # install 32 bit libs fi if [ $TRAVIS_ARCH == ppc64le ]; then # install ppc64le specific things. fi Agreed. + +if [ "$M32" ]; then + # 32-bit and 64-bit libunwind can not be installed at the same time. + # This will remove the 64-bit libunwind and install 32-bit version. + sudo apt-get install -y libunwind-dev:i386 +fi + +elif [ $TRAVIS_ARCH == ppc64le ]; then + sudo apt-get install -y flex Why 'flex' is needed? I found that Flex was needed for kernel builds, but since I am not doing that yet I can remove it. Thanks for the review, I will post a V2 patch shortly. Best regar
Re: [ovs-dev] [PATCH ] travis: support ppc64le builds
On 28.10.2019 22:22, David Wilder wrote: Add support for travis-ci ppc64le builds. - Updated matrix in .travis.yml to include a ppc64le build. - Added support to install packages needed by specific architectures. To keep the total build time at an acceptable level only a single build job is included in the matrix for ppc64le. A build report example can be found here [1] [0] http://travis-ci.org/ [1] https://travis-ci.org/djlwilder/ovs/builds/604098141 Signed-off-by: David Wilder Hi David, Thanks for working on this. I have a couple of question regarding ppc64le support by TravisCI. It seems that they are not supporting this architecture officially and refusing[1] to solve any issues that appears while using it. There also no official documentation. It's kind of a hidden feature that some projects are using for their own risk [2]. Do you know why this happens or maybe you have some insights about what is going on/how it works? The API is also a bit strange because Travis started to officially support arm builds, but this is done via 'arch' knob, not the 'os'. Will it be changed over time for ppc64le? [1] https://travis-ci.community/t/ppc64le-arch-support-on-travis-ci-com-vs-travis-ci-org/2898/2 [2] https://github.com/openssl/openssl/commit/13da3ad00c80e1da816ca27f6c15b0ecee1bb0b8 Few code comments inline. --- .travis.yml | 5 +++-- .travis/linux-prepare.sh | 18 ++ 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index 5676d9748..c99f26815 100644 --- a/.travis.yml +++ b/.travis.yml @@ -14,7 +14,6 @@ addons: apt: packages: - bc - - gcc-multilib - libssl-dev - llvm-dev - libjemalloc1 @@ -24,7 +23,6 @@ addons: - libelf-dev - selinux-policy-dev - libunbound-dev - - libunbound-dev:i386 - libunwind-dev before_install: ./.travis/${TRAVIS_OS_NAME}-prepare.sh @@ -50,6 +48,9 @@ matrix: - os: osx compiler: clang env: OPTS="--disable-ssl" +- os: linux-ppc64le + compiler: gcc + env: OPTS="--disable-ssl" script: ./.travis/${TRAVIS_OS_NAME}-build.sh $OPTS diff --git a/.travis/linux-prepare.sh b/.travis/linux-prepare.sh index e546d32cb..f3a9a6d44 100755 --- a/.travis/linux-prepare.sh +++ b/.travis/linux-prepare.sh @@ -15,8 +15,18 @@ cd .. pip install --disable-pip-version-check --user six flake8 hacking pip install --user --upgrade docutils -if [ "$M32" ]; then -# 32-bit and 64-bit libunwind can not be installed at the same time. -# This will remove the 64-bit libunwind and install 32-bit version. -sudo apt-get install -y libunwind-dev:i386 +# Include packages needed by specific architectures. +if [ $TRAVIS_ARCH == amd64 ]; then + sudo apt-get install -y \ +libunbound-dev:i386 \ +gcc-multilib These packages are only needed for 32bit build, so you may just move them to the command that installs 32bit version of libunwind. And since you're not building 32bit with ppc64le, the code could look like: if [ "$M32" ]; then # install 32 bit libs fi if [ $TRAVIS_ARCH == ppc64le ]; then # install ppc64le specific things. fi + +if [ "$M32" ]; then + # 32-bit and 64-bit libunwind can not be installed at the same time. + # This will remove the 64-bit libunwind and install 32-bit version. + sudo apt-get install -y libunwind-dev:i386 +fi + +elif [ $TRAVIS_ARCH == ppc64le ]; then + sudo apt-get install -y flex Why 'flex' is needed? Best regards, Ilya Maximets. ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] Cómo hacer una presentación profesional
Me da mucho gusto saludarte. Es, para mí, un placer poder invitarte a nuestro Curso en Línea "¿Cómo presentar resultados a nivel profesional?", que se estará llevando a cabo el día Miércoles 13 de Noviembre con un horario de 10:00 a 14:00 hrs. (hora del centro de México) Presentar un proyecto a nivel profesional utilizando las herramientas adecuadas y teniendo una planeación óptima. Ejes Temáticos: - ¿Cómo presentar un proyecto? - Elaboración de proyectos. - Etapas de un proyecto. - Herramientas a utilizar en una presentación. - Organización y control de tiempos. - Como hacer una propuesta de trabajo profesional. - Como presentar resultados a nivel profesional. Teléfonos: (045) 55 15 54 66 30 - (045) 55 85567293 - (045) 5530167085 Solicita información respondiendo a este correo con la palabra Power, junto con los siguientes datos: Nombre: Correo electrónico: Número telefónico: Qué tengas un excelente día Si desea dejar de recibir nuestra promoción favor de responder con la palabra baja o enviar un correo a bajas@ innovalearn.net ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCHv5] netdev-linux: Detect numa node id.
On 23.10.2019 23:08, William Tu wrote: The patch detects the numa node id from the name of the netdev, by reading the '/sys/class/net//device/numa_node'. If not available, ex: virtual device, or any error happens, return numa id 0. Currently only the afxdp netdev type uses it, other linux netdev types are disabled due to no use case. Signed-off-by: William Tu --- v5: Feedbacks from Ilya - reafactor the error handling - add mutex lock - Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/601947245 v4: Feedbacks from Eelco - Tested-at: https://travis-ci.org/williamtu/ovs-travis/builds/599308893 v3: Feedbacks from Ilya and Eelco - update doc, afxdp.rst - fix coding style - fix limit of numa node max, by using ovs_numa_numa_id_is_valid - move the function to netdev-linux - cache the result of numa_id - add security check for netdev name - use fscanf instead of read and convert to int - revise some error message content v2: address feedback from Eelco fix memory leak of xaspintf log using INFO instead of WARN --- Documentation/intro/install/afxdp.rst | 1 - lib/netdev-afxdp.c| 11 -- lib/netdev-linux-private.h| 2 ++ lib/netdev-linux.c| 63 ++- 4 files changed, 64 insertions(+), 13 deletions(-) diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst index 820e9d993d8f..6c00c4ad1356 100644 --- a/Documentation/intro/install/afxdp.rst +++ b/Documentation/intro/install/afxdp.rst @@ -309,7 +309,6 @@ Below is a script using namespaces and veth peer:: Limitations/Known Issues -#. Device's numa ID is always 0, need a way to find numa id from a netdev. #. No QoS support because AF_XDP netdev by-pass the Linux TC layer. A possible work-around is to use OpenFlow meter action. #. Most of the tests are done using i40e single port. Multiple ports and diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c index 8eb270c150e8..cfd93fab9f45 100644 --- a/lib/netdev-afxdp.c +++ b/lib/netdev-afxdp.c @@ -543,17 +543,6 @@ out: return err; } -int -netdev_afxdp_get_numa_id(const struct netdev *netdev) -{ -/* FIXME: Get netdev's PCIe device ID, then find - * its NUMA node id. - */ -VLOG_INFO("FIXME: Device %s always use numa id 0.", - netdev_get_name(netdev)); -return 0; -} - static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode) { diff --git a/lib/netdev-linux-private.h b/lib/netdev-linux-private.h index a350be151147..c8f2be47b10b 100644 --- a/lib/netdev-linux-private.h +++ b/lib/netdev-linux-private.h @@ -96,6 +96,8 @@ struct netdev_linux { /* LAG information. */ bool is_lag_master; /* True if the netdev is a LAG master. */ +int numa_id;/* NUMA node id. */ + #ifdef HAVE_AF_XDP /* AF_XDP information. */ struct xsk_socket_info **xsks; diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index f4819237370a..4dd05493b238 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -236,6 +236,7 @@ enum { VALID_VPORT_STAT_ERROR = 1 << 5, VALID_DRVINFO = 1 << 6, VALID_FEATURES = 1 << 7, +VALID_NUMA_ID = 1 << 8, }; struct linux_lag_slave { @@ -1391,6 +1392,66 @@ netdev_linux_tap_batch_send(struct netdev *netdev_, return 0; } +static int +netdev_linux_get_numa_id__(const struct netdev *netdev_) Since you've created a separate function, I think we need to add a thread safety annotation here: OVS_REQUIRES(netdev->mutex) For this purpose, you'll better pass the instance of 'netdev_linux' in this function. Original netdev only used to get the netdev name, so it'll be not hard to use &netdev->up instead for that purpose. Second thing is that we should preserve numa cache on netlink updates. Something like this: diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c index 4dd05493b..0495a035f 100644 --- a/lib/netdev-linux.c +++ b/lib/netdev-linux.c @@ -821,9 +821,9 @@ netdev_linux_update__(struct netdev_linux *dev, { if (rtnetlink_type_is_rtnlgrp_link(change->nlmsg_type)) { if (change->nlmsg_type == RTM_NEWLINK) { -/* Keep drv-info, and ip addresses. */ +/* Keep drv-info, ip addresses and NUMA id. */ netdev_linux_changed(dev, change->ifi_flags, - VALID_DRVINFO | VALID_IN); + VALID_DRVINFO | VALID_IN | VALID_NUMA_ID); /* Update netdev from rtnl-change msg. */ if (change->mtu) { --- Without this change we're re-checking the numa id after each time device flags/mtu/whatever updated. Best regards, Ilya Maximets. ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovn] Fix system-ovn test failures
On Tue, Oct 29, 2019 at 8:16 PM Russell Bryant wrote: > > Thanks, I just hit this and the patch fixes it for me. > > Acked-by: Russell Bryant Thanks for the reviews. I applied this patch to master. Numan > > On Tue, Oct 29, 2019 at 9:56 AM Han Zhou wrote: > > > > Acked-by: hz...@ovn.org > > > > On Tue, Oct 29, 2019 at 5:26 AM wrote: > > > > > From: Numan Siddique > > > > > > The commit b740928656a1("testsuite: Use ovn-macros instead of > > > ofproto-macros.") > > > missed updating the system test suite files to include ovn-macros.at. This > > > patch adds it. > > > > > > CC: Han Zhou > > > Signed-off-by: Numan Siddique > > > --- > > > tests/system-kmod-testsuite.at | 1 + > > > tests/system-userspace-testsuite.at | 1 + > > > 2 files changed, 2 insertions(+) > > > > > > diff --git a/tests/system-kmod-testsuite.at b/tests/ > > > system-kmod-testsuite.at > > > index 6c8478093..2ccd9f1ce 100644 > > > --- a/tests/system-kmod-testsuite.at > > > +++ b/tests/system-kmod-testsuite.at > > > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) > > > m4_include([tests/ovs-macros.at]) > > > m4_include([tests/ovsdb-macros.at]) > > > m4_include([tests/ofproto-macros.at]) > > > +m4_include([tests/ovn-macros.at]) > > > m4_include([tests/system-common-macros.at]) > > > m4_include([tests/system-kmod-macros.at]) > > > > > > diff --git a/tests/system-userspace-testsuite.at b/tests/ > > > system-userspace-testsuite.at > > > index 784eedd2c..4022ae620 100644 > > > --- a/tests/system-userspace-testsuite.at > > > +++ b/tests/system-userspace-testsuite.at > > > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) > > > m4_include([tests/ovs-macros.at]) > > > m4_include([tests/ovsdb-macros.at]) > > > m4_include([tests/ofproto-macros.at]) > > > +m4_include([tests/ovn-macros.at]) > > > m4_include([tests/system-userspace-macros.at]) > > > m4_include([tests/system-common-macros.at]) > > > > > > -- > > > 2.21.0 > > > > > > > > ___ > > dev mailing list > > d...@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > > > > -- > Russell Bryant > ___ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCHv2] netdev-afxdp: Enable loading XDP program.
See comments inline… On 5 Oct 2019, at 2:12, William Tu wrote: Now netdev-afxdp always forwards all packets to userspace because it is using libbpf's default XDP program, see 'xsk_load_xdp_prog'. There are some cases when users want to keep packets in kernel instead of sending to userspace, for example, management traffic such as SSH should be processed in kernel. The patch enables loading the user-provide XDP program by doing $ovs-vsctl -- set int afxdp-p0 options:xdpobj= Should we maybe change the name to xdp-obj? So users can implement their filtering logic or traffic steering idea in their XDP program, and rest of the traffic passes to AF_XDP socket handled by OVS. Signed-off-by: William Tu --- v2: A couple fixes and remove RFC --- Documentation/intro/install/afxdp.rst | 46 lib/netdev-afxdp.c| 137 ++ lib/netdev-linux-private.h| 2 + 3 files changed, 169 insertions(+), 16 deletions(-) diff --git a/Documentation/intro/install/afxdp.rst b/Documentation/intro/install/afxdp.rst index 820e9d993d8f..f484bdb8249b 100644 --- a/Documentation/intro/install/afxdp.rst +++ b/Documentation/intro/install/afxdp.rst @@ -265,6 +265,52 @@ Or, use OVS pmd tool:: ovs-appctl dpif-netdev/pmd-stats-show +Loading Custom XDP Program +-- +By defailt, netdev-afxdp always forwards all packets to userspace because +it is using libbpf's default XDP program. There are some cases when users +want to keep packets in kernel instead of sending to userspace, for example, +management traffic such as SSH should be processed in kernel. This can be +done by loading the user-provided XDP program:: + + ovs-vsctl -- set int afxdp-p0 options:xdpobj= + +So users can implement their filtering logic or traffic steering idea +in their XDP program, and rest of the traffic passes to AF_XDP socket +handled by OVS. Below is a sample C program compiled under kernel's +samples/bpf/. + +.. code-block:: c + + #include + #include "bpf_helpers.h" + + /* OVS will associate map 'xsks_map' to xsk socket. */ + struct bpf_map_def SEC("maps") xsks_map = { + .type = BPF_MAP_TYPE_XSKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 32, + }; + Maybe something like this could be added to make sure it compiles with older kernels: #if LINUX_VERSION_CODE < KERNEL_VERSION(5,3,0) /* Kernel version before 5.3 needed an additional map */ struct bpf_map_def SEC("maps") qidconf_map = { .type = BPF_MAP_TYPE_ARRAY, .key_size = sizeof(int), .value_size = sizeof(int), .max_entries = 64, }; #endi See also here: https://github.com/chaudron/xdp-tutorial/blob/master/advanced03-AF_XDP/af_xdp_kern.c + SEC("xdp_sock") + int xdp_sock_prog(struct xdp_md *ctx) + { + int index = ctx->rx_queue_index; + + /* Customized by user. + * For example + * 1) filter out all SSH traffic and return XDP_PASS + *for kernel to process. + * 2) Drop unwanted packet by returning XDP_DROP. + */ + + /* Rest of packets goes to AF_XDP. */ + return bpf_redirect_map(&xsks_map, index, 0); + } + char _license[] SEC("license") = "GPL"; + + Example Script -- diff --git a/lib/netdev-afxdp.c b/lib/netdev-afxdp.c index 6e01803272aa..231d6473fb67 100644 --- a/lib/netdev-afxdp.c +++ b/lib/netdev-afxdp.c @@ -21,6 +21,7 @@ #include "netdev-afxdp.h" #include "netdev-afxdp-pool.h" +#include #include #include #include @@ -29,6 +30,7 @@ #include #include #include +#include #include #include @@ -82,7 +84,7 @@ BUILD_ASSERT_DECL(PROD_NUM_DESCS == CONS_NUM_DESCS); #define UMEM2DESC(elem, base) ((uint64_t)((char *)elem - (char *)base)) static struct xsk_socket_info *xsk_configure(int ifindex, int xdp_queue_id, - int mode); + int mode, bool use_default_xdp); static void xsk_remove_xdp_program(uint32_t ifindex, int xdpmode); static void xsk_destroy(struct xsk_socket_info *xsk); static int xsk_configure_all(struct netdev *netdev); @@ -158,6 +160,51 @@ netdev_afxdp_sweep_unused_pools(void *aux OVS_UNUSED) ovs_mutex_unlock(&unused_pools_mutex); } +static int +xsk_load_prog(const char *path, struct bpf_object **obj, + int *prog_fd) +{ +struct bpf_prog_load_attr attr = { +.prog_type = BPF_PROG_TYPE_XDP, +.file = path, +}; + +if (bpf_prog_load_xattr(&attr, obj, prog_fd)) { +VLOG_WARN("Can't load XDP program at '%s'", path); I think it should be a VLOG_ERR() +return EINVAL; +} + +VLOG_INFO("Loaded XDP program at '%s'", path); +return 0; +} + +static int +xsk_configure_map(struct bpf_object *obj, struct xsk_socket_info *xsk, + int queue_id) +{ +struct bpf_map *map; +int xsks_map_fd; +int xsk_fd; +int ret; + +
[ovs-dev] [PATCH ovn] ovn-controller: Fix use of dangling pointers in I-P runtime_data.
The incremental processing engine might stop a run before the en_runtime_data node is processed. In such cases the ed_runtime_data fields might contain pointers to already deleted SB records. For example, if a port binding corresponding to a patch port is removed from the SB database and the incremental processing engine aborts before the en_runtime_data node is processed then the corresponding local_datapath hashtable entry in ed_runtime_data is stale and will store a pointer to the already freed sbrec_port_binding record. This will cause invalid memory accesses in various places (e.g., pinctrl_run() -> prepare_ipv6_ras()). To fix the issue we need a way to track how each node was processed during an engine run. This commit transforms the 'changed' field in struct engine_node in a 'state' field. Possible node states are: - "Stale": data in the node is not up to date with the DB. - "Updated": data in the node is valid but was updated during the last run of the engine. - "Valid": data in the node is valid and didn't change during the last run of the engine. - "Aborted": during the last run, processing was aborted for this node. The commit also simplifies the logic of calling engine_run and engine_need_run in order to reduce the number of external variables required to track the result of the last engine execution. Functions that need to be called from the main loop and depend on various data contents of the engine's nodes are now called only if the data is up to date. CC: Han Zhou Fixes: a6b7d9f4f047 ("ovn-controller: Fix flow installation latency caused by recompute.") Signed-off-by: Dumitru Ceara --- controller/ovn-controller.c | 144 +--- lib/inc-proc-eng.c | 117 +-- lib/inc-proc-eng.h | 47 --- 3 files changed, 221 insertions(+), 87 deletions(-) diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c index 9ab98be..8e07d5e 100644 --- a/controller/ovn-controller.c +++ b/controller/ovn-controller.c @@ -758,10 +758,10 @@ en_ofctrl_is_connected_run(struct engine_node *node) (struct ed_type_ofctrl_is_connected *)node->data; if (data->connected != ofctrl_is_connected()) { data->connected = !data->connected; -node->changed = true; +engine_set_node_state(node, EN_UPDATED); return; } -node->changed = false; +engine_set_node_state(node, EN_VALID); } struct ed_type_addr_sets { @@ -811,7 +811,7 @@ en_addr_sets_run(struct engine_node *node) addr_sets_init(as_table, &as->addr_sets); as->change_tracked = false; -node->changed = true; +engine_set_node_state(node, EN_UPDATED); } static bool @@ -830,11 +830,14 @@ addr_sets_sb_address_set_handler(struct engine_node *node) addr_sets_update(as_table, &as->addr_sets, &as->new, &as->deleted, &as->updated); -node->changed = !sset_is_empty(&as->new) || !sset_is_empty(&as->deleted) -|| !sset_is_empty(&as->updated); +if (!sset_is_empty(&as->new) || !sset_is_empty(&as->deleted) || +!sset_is_empty(&as->updated)) { +engine_set_node_state(node, EN_UPDATED); +} else { +engine_set_node_state(node, EN_VALID); +} as->change_tracked = true; -node->changed = true; return true; } @@ -885,7 +888,7 @@ en_port_groups_run(struct engine_node *node) port_groups_init(pg_table, &pg->port_groups); pg->change_tracked = false; -node->changed = true; +engine_set_node_state(node, EN_UPDATED); } static bool @@ -904,11 +907,14 @@ port_groups_sb_port_group_handler(struct engine_node *node) port_groups_update(pg_table, &pg->port_groups, &pg->new, &pg->deleted, &pg->updated); -node->changed = !sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted) -|| !sset_is_empty(&pg->updated); +if (!sset_is_empty(&pg->new) || !sset_is_empty(&pg->deleted) || +!sset_is_empty(&pg->updated)) { +engine_set_node_state(node, EN_UPDATED); +} else { +engine_set_node_state(node, EN_VALID); +} pg->change_tracked = true; -node->changed = true; return true; } @@ -1091,7 +1097,7 @@ en_runtime_data_run(struct engine_node *node) update_ct_zones(local_lports, local_datapaths, ct_zones, ct_zone_bitmap, pending_ct_zones); -node->changed = true; +engine_set_node_state(node, EN_UPDATED); } static bool @@ -1157,10 +1163,10 @@ en_mff_ovn_geneve_run(struct engine_node *node) enum mf_field_id mff_ovn_geneve = ofctrl_get_mf_field_id(); if (data->mff_ovn_geneve != mff_ovn_geneve) { data->mff_ovn_geneve = mff_ovn_geneve; -node->changed = true; +engine_set_node_state(node, EN_UPDATED); return; } -node->changed = false; +engine_set_node_state(node, EN_VALID); } struct ed_type_flow_output
Re: [ovs-dev] [PATCH ovn] Fix system-ovn test failures
Thanks, I just hit this and the patch fixes it for me. Acked-by: Russell Bryant On Tue, Oct 29, 2019 at 9:56 AM Han Zhou wrote: > > Acked-by: hz...@ovn.org > > On Tue, Oct 29, 2019 at 5:26 AM wrote: > > > From: Numan Siddique > > > > The commit b740928656a1("testsuite: Use ovn-macros instead of > > ofproto-macros.") > > missed updating the system test suite files to include ovn-macros.at. This > > patch adds it. > > > > CC: Han Zhou > > Signed-off-by: Numan Siddique > > --- > > tests/system-kmod-testsuite.at | 1 + > > tests/system-userspace-testsuite.at | 1 + > > 2 files changed, 2 insertions(+) > > > > diff --git a/tests/system-kmod-testsuite.at b/tests/ > > system-kmod-testsuite.at > > index 6c8478093..2ccd9f1ce 100644 > > --- a/tests/system-kmod-testsuite.at > > +++ b/tests/system-kmod-testsuite.at > > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) > > m4_include([tests/ovs-macros.at]) > > m4_include([tests/ovsdb-macros.at]) > > m4_include([tests/ofproto-macros.at]) > > +m4_include([tests/ovn-macros.at]) > > m4_include([tests/system-common-macros.at]) > > m4_include([tests/system-kmod-macros.at]) > > > > diff --git a/tests/system-userspace-testsuite.at b/tests/ > > system-userspace-testsuite.at > > index 784eedd2c..4022ae620 100644 > > --- a/tests/system-userspace-testsuite.at > > +++ b/tests/system-userspace-testsuite.at > > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) > > m4_include([tests/ovs-macros.at]) > > m4_include([tests/ovsdb-macros.at]) > > m4_include([tests/ofproto-macros.at]) > > +m4_include([tests/ovn-macros.at]) > > m4_include([tests/system-userspace-macros.at]) > > m4_include([tests/system-common-macros.at]) > > > > -- > > 2.21.0 > > > > > ___ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev -- Russell Bryant ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovn] Fix system-ovn test failures
Acked-by: hz...@ovn.org On Tue, Oct 29, 2019 at 5:26 AM wrote: > From: Numan Siddique > > The commit b740928656a1("testsuite: Use ovn-macros instead of > ofproto-macros.") > missed updating the system test suite files to include ovn-macros.at. This > patch adds it. > > CC: Han Zhou > Signed-off-by: Numan Siddique > --- > tests/system-kmod-testsuite.at | 1 + > tests/system-userspace-testsuite.at | 1 + > 2 files changed, 2 insertions(+) > > diff --git a/tests/system-kmod-testsuite.at b/tests/ > system-kmod-testsuite.at > index 6c8478093..2ccd9f1ce 100644 > --- a/tests/system-kmod-testsuite.at > +++ b/tests/system-kmod-testsuite.at > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) > m4_include([tests/ovs-macros.at]) > m4_include([tests/ovsdb-macros.at]) > m4_include([tests/ofproto-macros.at]) > +m4_include([tests/ovn-macros.at]) > m4_include([tests/system-common-macros.at]) > m4_include([tests/system-kmod-macros.at]) > > diff --git a/tests/system-userspace-testsuite.at b/tests/ > system-userspace-testsuite.at > index 784eedd2c..4022ae620 100644 > --- a/tests/system-userspace-testsuite.at > +++ b/tests/system-userspace-testsuite.at > @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) > m4_include([tests/ovs-macros.at]) > m4_include([tests/ovsdb-macros.at]) > m4_include([tests/ofproto-macros.at]) > +m4_include([tests/ovn-macros.at]) > m4_include([tests/system-userspace-macros.at]) > m4_include([tests/system-common-macros.at]) > > -- > 2.21.0 > > ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v2] dpif-netdev: log rxq assignment in isolated pmd
Could this patch be reviewed please. Thanks, Gowrishankar On Tue, Oct 22, 2019 at 3:06 PM Gowrishankar Muthukrishnan < gmuth...@redhat.com> wrote: > There is no log about isolated rxq assignment in a pmd today, which > sometimes could be useful to trace rxq/pmd pinning, when debugging > with log. Ovs-appctl dpif-netdev/pmd-rxq-show reports about it > already, but logging is helpful to trace pinning in time. > > Changes: > v2: init numa_id for the info. > > Reported-at: https://bugzilla.redhat.com/1728616 > Signed-off-by: Gowrishankar Muthukrishnan > --- > lib/dpif-netdev.c | 5 + > 1 file changed, 5 insertions(+) > > diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c > index 4546b55e8..4d7c36787 100644 > --- a/lib/dpif-netdev.c > +++ b/lib/dpif-netdev.c > @@ -4573,6 +4573,11 @@ rxq_scheduling(struct dp_netdev *dp, bool pinned) > OVS_REQUIRES(dp->port_mutex) > q->pmd = pmd; > pmd->isolated = true; > dp_netdev_pmd_unref(pmd); > +numa_id = netdev_get_numa_id(q->port->netdev); > +VLOG_INFO("Core %d on numa node %d assigned port > \'%s\' " > + "rx queue %d.", pmd->core_id, numa_id, > + netdev_rxq_get_name(q->rx), > + netdev_rxq_get_queue_id(q->rx)); > } > } else if (!pinned && q->core_id == OVS_CORE_UNSPEC) { > uint64_t cycle_hist = 0; > -- > 2.17.2 > > ___ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH 1/1] ovsdb_execute_mutate: removed unused ovsdb_row pointer variable
Signed-off-by: Damijan Skvarc --- ovsdb/execution.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/ovsdb/execution.c b/ovsdb/execution.c index c55a0b7..f2cf3d7 100644 --- a/ovsdb/execution.c +++ b/ovsdb/execution.c @@ -562,7 +562,6 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct ovsdb_parser *parser, const struct json *mutations_json; struct ovsdb_condition condition = OVSDB_CONDITION_INITIALIZER(&condition); struct ovsdb_mutation_set mutations = OVSDB_MUTATION_SET_INITIALIZER; -struct ovsdb_row *row = NULL; struct mutate_row_cbdata mr; struct ovsdb_error *error; @@ -595,7 +594,6 @@ ovsdb_execute_mutate(struct ovsdb_execution *x, struct ovsdb_parser *parser, json_object_put(result, "count", json_integer_create(mr.n_matches)); } -ovsdb_row_destroy(row); ovsdb_mutation_set_destroy(&mutations); ovsdb_condition_destroy(&condition); -- 2.7.4 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v6 ovn] Add DNSSL support to OVN
On Tue, Oct 29, 2019 at 5:49 PM Lorenzo Bianconi wrote: > > Introduce the possibility to specify a DNSSL option to Router > Advertisement packets. DNS Search list can be specified using > 'dnssl' tag in the ipv6_ra_configs column of logical router > port table > > Signed-off-by: Lorenzo Bianconi Thanks. I applied this patch to master. Numan > --- > Changes since v5: > - add comment describing how domain names are encoded > > Changes since v4: > - update documentation > - fix possible array out of bounds issue in packet_put_ra_dnssl_opt > - fix strtok_r usage > --- > controller/pinctrl.c | 63 > lib/ovn-l7.h | 11 > northd/ovn-northd.c | 4 +++ > ovn-nb.xml | 5 > tests/ovn.at | 30 ++--- > 5 files changed, 103 insertions(+), 10 deletions(-) > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > index f105ebb5c..655ba54a1 100644 > --- a/controller/pinctrl.c > +++ b/controller/pinctrl.c > @@ -2194,6 +2194,7 @@ struct ipv6_ra_config { > struct lport_addresses prefixes; > struct in6_addr rdnss; > bool has_rdnss; > +struct ds dnssl; > }; > > struct ipv6_ra_state { > @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config) > { > if (config) { > destroy_lport_addresses(&config->prefixes); > +ds_destroy(&config->dnssl); > free(config); > } > } > @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding > *pb) > nd_ra_min_interval_default(config->max_interval)); > config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT); > config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK; > +ds_init(&config->dnssl); > > const char *address_mode = smap_get(&pb->options, > "ipv6_ra_address_mode"); > if (!address_mode) { > @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding > *pb) > } > config->has_rdnss = !!rdnss; > > +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl"); > +if (dnssl) { > +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl)); > +} > + > return config; > > fail: > @@ -2354,6 +2362,57 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t > num, >prev_l4_size + len * > 8)); > } > > +static void > +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime, > +char *dnssl_list) > +{ > +size_t prev_l4_size = dp_packet_l4_size(b); > +size_t size = sizeof(struct ovs_nd_dnssl); > +struct ip6_hdr *nh = dp_packet_l3(b); > +char *t0, *r0, dnssl[255] = {}; > +int i = 0; > + > +/* Multiple DNS Search List must be 'comma' separated > + * (e.g. "a.b.c, d.e.f"). Domain names must be encoded > + * as described in Section 3.1 of RFC1035. > + * (e.g if dns list is a.b.c,www.ovn.org, it will be encoded as: > + * 01 61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00 > + */ > +for (t0 = strtok_r(dnssl_list, ",", &r0); t0; > + t0 = strtok_r(NULL, ",", &r0)) { > +char *t1, *r1; > + > +size += strlen(t0) + 2; > +if (size > sizeof(dnssl)) { > +return; > +} > + > +for (t1 = strtok_r(t0, ".", &r1); t1; > + t1 = strtok_r(NULL, ".", &r1)) { > +dnssl[i++] = strlen(t1); > +memcpy(&dnssl[i], t1, strlen(t1)); > +i += strlen(t1); > +} > +dnssl[i++] = 0; > +} > +size = ROUND_UP(size, 8); > +nh->ip6_plen = htons(prev_l4_size + size); > + > +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof > *nd_dnssl); > +nd_dnssl->type = ND_OPT_DNSSL; > +nd_dnssl->len = size / 8; > +nd_dnssl->reserved = 0; > +put_16aligned_be32(&nd_dnssl->lifetime, lifetime); > + > +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl); > + > +struct ovs_ra_msg *ra = dp_packet_l4(b); > +ra->icmph.icmp6_cksum = 0; > +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b)); > +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra, > + prev_l4_size + size)); > +} > + > /* Called with in the pinctrl_handler thread context. */ > static long long int > ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra) > @@ -2382,6 +2441,10 @@ ipv6_ra_send(struct rconn *swconn, struct > ipv6_ra_state *ra) > packet_put_ra_rdnss_opt(&packet, 1, htonl(0x), > &ra->config->rdnss); > } > +if (ra->config->dnssl.length) { > +packet_put_ra_dnssl_opt(&packet, htonl(0x), > +ra->config->dnssl.string); > +} > > uint64_t ofpacts_stub[4096 / 8]; > struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h > index 12cee6329
Re: [ovs-dev] [PATCH v5 ovn] Add DNSSL support to OVN
On Tue, Oct 29, 2019 at 4:39 PM Lorenzo Bianconi wrote: > > > On Tue, Oct 29, 2019 at 3:29 PM Lorenzo Bianconi > > wrote: > > > > > > Introduce the possibility to specify a DNSSL option to Router > > > Advertisement packets. DNS Search list can be specified using > > > 'dnssl' tag in the ipv6_ra_configs column of logical router > > > port table > > > > > > Signed-off-by: Lorenzo Bianconi > > > --- > > > Changes since v4: > > > - update documentation > > > - fix possible array out of bounds issue in packet_put_ra_dnssl_opt > > > - fix strtok_r usage > > > --- > > > controller/pinctrl.c | 59 > > > lib/ovn-l7.h | 11 + > > > northd/ovn-northd.c | 4 +++ > > > ovn-nb.xml | 5 > > > tests/ovn.at | 30 ++ > > > 5 files changed, 99 insertions(+), 10 deletions(-) > > > > > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > > > index f105ebb5c..cddf98648 100644 > > > --- a/controller/pinctrl.c > > > +++ b/controller/pinctrl.c > > > @@ -2194,6 +2194,7 @@ struct ipv6_ra_config { > > > struct lport_addresses prefixes; > > > struct in6_addr rdnss; > > > bool has_rdnss; > > > +struct ds dnssl; > > > }; > > > > > > struct ipv6_ra_state { > > > @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config) > > > { > > > if (config) { > > > destroy_lport_addresses(&config->prefixes); > > > +ds_destroy(&config->dnssl); > > > free(config); > > > } > > > } > > > @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct > > > sbrec_port_binding *pb) > > > nd_ra_min_interval_default(config->max_interval)); > > > config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", > > > ND_MTU_DEFAULT); > > > config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK; > > > +ds_init(&config->dnssl); > > > > > > const char *address_mode = smap_get(&pb->options, > > > "ipv6_ra_address_mode"); > > > if (!address_mode) { > > > @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct > > > sbrec_port_binding *pb) > > > } > > > config->has_rdnss = !!rdnss; > > > > > > +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl"); > > > +if (dnssl) { > > > +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl)); > > > +} > > > + > > > return config; > > > > > > fail: > > > @@ -2354,6 +2362,53 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, > > > uint8_t num, > > >prev_l4_size + len > > > * 8)); > > > } > > > > > > +static void > > > +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime, > > > +char *dnssl_list) > > > +{ > > > +size_t prev_l4_size = dp_packet_l4_size(b); > > > +size_t size = sizeof(struct ovs_nd_dnssl); > > > +struct ip6_hdr *nh = dp_packet_l3(b); > > > +char *t0, *r0, dnssl[255] = {}; > > > +int i = 0; > > > + > > > +/* Multiple DNS Search List must be 'comma' separated > > > + * (e.g. "a.b.c, d.e.f") */ > > > > Can you please add a comment on how the dns list will be encode with an > > example. > > Like if dns list is a.b.c,www.ovn.org then it will be encoded as - 01 > > 61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00 > > > > This will really help in understanding what's going on. Also can you > > link the RFC version. > > Hi Numan, > > Thx for the review. Ack, will do in v6 > > > > > > +for (t0 = strtok_r(dnssl_list, ",", &r0); t0; > > > + t0 = strtok_r(NULL, ",", &r0)) { > > > +char *t1, *r1; > > > + > > > +size += strlen(t0) + 2; > > > +if (size > sizeof(dnssl)) { > > > > For the sake of simplicity let's assume the length of dnssl is 10. > > Then if the dns list is a.b.c,d.e.f then this will cause the buffer to > > overflow right ? > > > > For the first outer for loop with t0 = a.b.c, the 'size' will be 7. So 7 < > > 10. > > In the next outer for loop with t0 = d.e.f, the size will be again 7 > > and it will cause the inner for loop to copy the contents beyond > > the size of dnssl right ? > > > > I think this code needs to be refined a bit to take all these things > > into consideration. > > > > I guess the check is fine since size value will not be reset: > size += strlen(t0) + 2; Ah right. I didn't notice "+=". Thank for pointing this out. > > Regards, > Lorenzo > > > > Thanks > > Numan > > > > > +return; > > > +} > > > + > > > > > > > > > +for (t1 = strtok_r(t0, ".", &r1); t1; > > > + t1 = strtok_r(NULL, ".", &r1)) { > > > +dnssl[i++] = strlen(t1); > > > +memcpy(&dnssl[i], t1, strlen(t1)); > > > +i += strlen(t1); > > > +} > > > +dnssl[i++] = 0; > > > +} > > > +size = ROUND_UP(size, 8); > > > +nh->ip6_plen = htons(prev_l4_size + size); > > > + > > > +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uni
[ovs-dev] [PATCH ovn] Fix system-ovn test failures
From: Numan Siddique The commit b740928656a1("testsuite: Use ovn-macros instead of ofproto-macros.") missed updating the system test suite files to include ovn-macros.at. This patch adds it. CC: Han Zhou Signed-off-by: Numan Siddique --- tests/system-kmod-testsuite.at | 1 + tests/system-userspace-testsuite.at | 1 + 2 files changed, 2 insertions(+) diff --git a/tests/system-kmod-testsuite.at b/tests/system-kmod-testsuite.at index 6c8478093..2ccd9f1ce 100644 --- a/tests/system-kmod-testsuite.at +++ b/tests/system-kmod-testsuite.at @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) m4_include([tests/ovs-macros.at]) m4_include([tests/ovsdb-macros.at]) m4_include([tests/ofproto-macros.at]) +m4_include([tests/ovn-macros.at]) m4_include([tests/system-common-macros.at]) m4_include([tests/system-kmod-macros.at]) diff --git a/tests/system-userspace-testsuite.at b/tests/system-userspace-testsuite.at index 784eedd2c..4022ae620 100644 --- a/tests/system-userspace-testsuite.at +++ b/tests/system-userspace-testsuite.at @@ -19,6 +19,7 @@ m4_ifdef([AT_COLOR_TESTS], [AT_COLOR_TESTS]) m4_include([tests/ovs-macros.at]) m4_include([tests/ovsdb-macros.at]) m4_include([tests/ofproto-macros.at]) +m4_include([tests/ovn-macros.at]) m4_include([tests/system-userspace-macros.at]) m4_include([tests/system-common-macros.at]) -- 2.21.0 ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
[ovs-dev] [PATCH v6 ovn] Add DNSSL support to OVN
Introduce the possibility to specify a DNSSL option to Router Advertisement packets. DNS Search list can be specified using 'dnssl' tag in the ipv6_ra_configs column of logical router port table Signed-off-by: Lorenzo Bianconi --- Changes since v5: - add comment describing how domain names are encoded Changes since v4: - update documentation - fix possible array out of bounds issue in packet_put_ra_dnssl_opt - fix strtok_r usage --- controller/pinctrl.c | 63 lib/ovn-l7.h | 11 northd/ovn-northd.c | 4 +++ ovn-nb.xml | 5 tests/ovn.at | 30 ++--- 5 files changed, 103 insertions(+), 10 deletions(-) diff --git a/controller/pinctrl.c b/controller/pinctrl.c index f105ebb5c..655ba54a1 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -2194,6 +2194,7 @@ struct ipv6_ra_config { struct lport_addresses prefixes; struct in6_addr rdnss; bool has_rdnss; +struct ds dnssl; }; struct ipv6_ra_state { @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config) { if (config) { destroy_lport_addresses(&config->prefixes); +ds_destroy(&config->dnssl); free(config); } } @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding *pb) nd_ra_min_interval_default(config->max_interval)); config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT); config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK; +ds_init(&config->dnssl); const char *address_mode = smap_get(&pb->options, "ipv6_ra_address_mode"); if (!address_mode) { @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding *pb) } config->has_rdnss = !!rdnss; +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl"); +if (dnssl) { +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl)); +} + return config; fail: @@ -2354,6 +2362,57 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t num, prev_l4_size + len * 8)); } +static void +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime, +char *dnssl_list) +{ +size_t prev_l4_size = dp_packet_l4_size(b); +size_t size = sizeof(struct ovs_nd_dnssl); +struct ip6_hdr *nh = dp_packet_l3(b); +char *t0, *r0, dnssl[255] = {}; +int i = 0; + +/* Multiple DNS Search List must be 'comma' separated + * (e.g. "a.b.c, d.e.f"). Domain names must be encoded + * as described in Section 3.1 of RFC1035. + * (e.g if dns list is a.b.c,www.ovn.org, it will be encoded as: + * 01 61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00 + */ +for (t0 = strtok_r(dnssl_list, ",", &r0); t0; + t0 = strtok_r(NULL, ",", &r0)) { +char *t1, *r1; + +size += strlen(t0) + 2; +if (size > sizeof(dnssl)) { +return; +} + +for (t1 = strtok_r(t0, ".", &r1); t1; + t1 = strtok_r(NULL, ".", &r1)) { +dnssl[i++] = strlen(t1); +memcpy(&dnssl[i], t1, strlen(t1)); +i += strlen(t1); +} +dnssl[i++] = 0; +} +size = ROUND_UP(size, 8); +nh->ip6_plen = htons(prev_l4_size + size); + +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof *nd_dnssl); +nd_dnssl->type = ND_OPT_DNSSL; +nd_dnssl->len = size / 8; +nd_dnssl->reserved = 0; +put_16aligned_be32(&nd_dnssl->lifetime, lifetime); + +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl); + +struct ovs_ra_msg *ra = dp_packet_l4(b); +ra->icmph.icmp6_cksum = 0; +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b)); +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra, + prev_l4_size + size)); +} + /* Called with in the pinctrl_handler thread context. */ static long long int ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra) @@ -2382,6 +2441,10 @@ ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra) packet_put_ra_rdnss_opt(&packet, 1, htonl(0x), &ra->config->rdnss); } +if (ra->config->dnssl.length) { +packet_put_ra_dnssl_opt(&packet, htonl(0x), +ra->config->dnssl.string); +} uint64_t ofpacts_stub[4096 / 8]; struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h index 12cee6329..5fc370bf5 100644 --- a/lib/ovn-l7.h +++ b/lib/ovn-l7.h @@ -231,6 +231,17 @@ struct nd_rdnss_opt { }; BUILD_ASSERT_DECL(ND_RDNSS_OPT_LEN == sizeof(struct nd_rdnss_opt)); +/* DNSSL option RFC 6106 */ +#define ND_OPT_DNSSL31 +#define ND_DNSSL_OPT_LEN8 +struct ovs_nd_dnssl { +u_int8_t type; /* ND_OPT_DNSSL */ +u_int8_t len; /* >= 2 */ +ovs_be16 re
Re: [ovs-dev] [PATCH] rhel: openvswitch-fedora.spec.in: Fix output redirect to null device
On Mon, Oct 28, 2019 at 10:37:44AM +0200, Roi Dayan wrote: > Add missing slash. > > Fixes: 0447019df7c6 ("fedora-spec: added systemd post/postun/pre/preun > sections") > Signed-off-by: Roi Dayan Thanks Roi, I've applied this to master and backported it as far back as branch-2.5. > --- > rhel/openvswitch-fedora.spec.in | 2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > diff --git a/rhel/openvswitch-fedora.spec.in b/rhel/openvswitch-fedora.spec.in > index 3a87c6d0c339..0f635210147c 100644 > --- a/rhel/openvswitch-fedora.spec.in > +++ b/rhel/openvswitch-fedora.spec.in > @@ -340,7 +340,7 @@ fi > %else > # Package install, not upgrade > if [ $1 -eq 1 ]; then > -/bin/systemctl daemon-reload >dev/null || : > +/bin/systemctl daemon-reload >/dev/null || : > fi > %endif > > -- > 2.8.4 > > ___ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev > ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH net-next v4 08/10] net: openvswitch: fix possible memleak on destroy flow-table
On Tue, Oct 29, 2019 at 3:38 PM Pravin Shelar wrote: > > On Sun, Oct 27, 2019 at 11:49 PM Tonghao Zhang > wrote: > > > > On Thu, Oct 24, 2019 at 3:14 PM Pravin Shelar wrote: > > > > > > On Tue, Oct 22, 2019 at 7:35 PM Tonghao Zhang > > > wrote: > > > > > > > > On Tue, Oct 22, 2019 at 2:58 PM Pravin Shelar wrote: > > > > > > > > ... > > > > ... > > > > struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, > > > > @@ -400,10 +458,9 @@ static struct table_instance > > > > *table_instance_rehash(struct table_instance *ti, > > > > return new_ti; > > > > } > > > > > > > > -int ovs_flow_tbl_flush(struct flow_table *flow_table) > > > > +int ovs_flow_tbl_flush(struct flow_table *table) > > > > { > > > > - struct table_instance *old_ti, *new_ti; > > > > - struct table_instance *old_ufid_ti, *new_ufid_ti; > > > > + struct table_instance *new_ti, *new_ufid_ti; > > > > > > > > new_ti = table_instance_alloc(TBL_MIN_BUCKETS); > > > > if (!new_ti) > > > > @@ -412,16 +469,12 @@ int ovs_flow_tbl_flush(struct flow_table > > > > *flow_table) > > > > if (!new_ufid_ti) > > > > goto err_free_ti; > > > > > > > > - old_ti = ovsl_dereference(flow_table->ti); > > > > - old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); > > > > + table_instance_destroy(table, true); > > > > > > > This would destroy running table causing unnecessary flow miss. Lets > > > keep current scheme of setting up new table before destroying current > > > one. > > > > > > > - rcu_assign_pointer(flow_table->ti, new_ti); > > ... > > /* Must be called with OVS mutex held. */ > > void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) > > { > > @@ -752,17 +794,7 @@ void ovs_flow_tbl_remove(struct flow_table > > *table, struct sw_flow *flow) > > struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); > > > > BUG_ON(table->count == 0); > > - hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); > > - table->count--; > > - if (ovs_identifier_is_ufid(&flow->id)) { > > - hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); > > - table->ufid_count--; > > - } > > - > > - /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be > > -* accessible as long as the RCU read lock is held. > > -*/ > > - flow_mask_remove(table, flow->mask); > > + table_instance_remove(table, ti, ufid_ti, flow, true); > > } > Lets rename table_instance_remove() to imply it is freeing a flow. hi Pravin, the function ovs_flow_free will free the flow actually. In -ovs_flow_cmd_del ovs_flow_tbl_remove ... ovs_flow_free In -table_instance_destroy table_instance_remove ovs_flow_free But if rename the table_instance_remove, table_instance_flow_free ? > Otherwise looks good. ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovs v3 2/2] netdev-dpdk: Add dpdkvdpa port
> -Original Message- > From: William Tu [mailto:u9012...@gmail.com] > Sent: Monday, October 28, 2019 10:46 PM > To: Noa Levy > Cc: ovs-dev@openvswitch.org; Oz Shlomo ; Majd > Dibbiny ; Ameer Mahagneh > ; Eli Britstein > Subject: Re: [ovs-dev] [PATCH ovs v3 2/2] netdev-dpdk: Add dpdkvdpa port > > Hi Noa, > > Thanks for your reply. > > > > > > Hi Noa, > > > > > > > > > > I have a couple more questions. I'm still at the learning stage > > > > > of this new feature, thanks in advance for your patience. > > > > > > > > > > On Thu, Oct 17, 2019 at 02:16:56PM +0300, Noa Ezra wrote: > > > > > > dpdkvdpa netdev works with 3 components: > > > > > > vhost-user socket, vdpa device: real vdpa device or a VF and > > > > > > representor of "vdpa device". > > > > > > > > > > What NIC card support this feature? > > > > > I don't have real vdpa device, can I use Intel X540 VF feature? > > > > > > > > > > > > > This feature will have two modes, SW and HW. > > > > The SW mode doesn't depend on a real vdpa device and allows you to > > > > use this feature even if you don't have a NIC that support it. > > Although you need to use representors, so you need your NIC to support > it. > > > > The HW mode will be implemented in the future and will use a real > > > > vdpa device. It will be better to use the HW mode if you have a > > > > NIC that support > > > it. > > > > > > > > For now, we only support the SW mode, when vdpa will have support > > > > in dpdk, we will add the HW mode to OVS. > > > > > > > > > > > > > > > > In order to add a new vDPA port, add a new port to existing > > > > > > bridge with type dpdkvdpa and vDPA options: > > > > > > ovs-vsctl add-port br0 vdpa0 -- set Interface vdpa0 type=dpdkvdpa > > > > > >options:vdpa-socket-path= > > > > > >options:vdpa-accelerator-devargs= > > > > > >options:dpdk-devargs=,representor=[id] > > > > > > > > > > > > On this command OVS will create a new netdev: > > > > > > 1. Register vhost-user-client device. > > > > > > 2. Open and configure VF dpdk port. > > > > > > 3. Open and configure representor dpdk port. > > > > > > > > > > > > The new netdev will use netdev_rxq_recv() function in order to > > > > > > receive packets from VF and push to vhost-user and receive > > > > > > packets from vhost-user and push to VF. > > > > > > > > > > > > Signed-off-by: Noa Ezra > > > > > > Reviewed-by: Oz Shlomo > > > > > > --- > > > > > > Documentation/automake.mk | 1 + > > > > > > Documentation/topics/dpdk/index.rst | 1 + > > > > > > Documentation/topics/dpdk/vdpa.rst | 90 > > > > > > > NEWS| 1 + > > > > > > lib/netdev-dpdk.c | 162 > > > > > > > > > > > vswitchd/vswitch.xml| 25 ++ > > > > > > 6 files changed, 280 insertions(+) create mode 100644 > > > > > > Documentation/topics/dpdk/vdpa.rst > > > > > > > > > > > > diff --git a/Documentation/automake.mk > > > > b/Documentation/automake.mk > > > > > > index cd68f3b..ee574bc 100644 > > > > > > --- a/Documentation/automake.mk > > > > > > +++ b/Documentation/automake.mk > > > > > > @@ -43,6 +43,7 @@ DOC_SOURCE = \ > > > > > > Documentation/topics/dpdk/ring.rst \ > > > > > > Documentation/topics/dpdk/vdev.rst \ > > > > > > Documentation/topics/dpdk/vhost-user.rst \ > > > > > > + Documentation/topics/dpdk/vdpa.rst \ > > > > > > Documentation/topics/fuzzing/index.rst \ > > > > > > Documentation/topics/fuzzing/what-is-fuzzing.rst \ > > > > > > > > > > > > Documentation/topics/fuzzing/ovs-fuzzing-infrastructure.rst \ > > > > > > diff --git a/Documentation/topics/dpdk/index.rst > > > > > > b/Documentation/topics/dpdk/index.rst > > > > > > index cf24a7b..c1d4ea7 100644 > > > > > > --- a/Documentation/topics/dpdk/index.rst > > > > > > +++ b/Documentation/topics/dpdk/index.rst > > > > > > @@ -41,3 +41,4 @@ The DPDK Datapath > > > > > > /topics/dpdk/pdump > > > > > > /topics/dpdk/jumbo-frames > > > > > > /topics/dpdk/memory > > > > > > + /topics/dpdk/vdpa > > > > > > diff --git a/Documentation/topics/dpdk/vdpa.rst > > > > > > b/Documentation/topics/dpdk/vdpa.rst > > > > > > new file mode 100644 > > > > > > > > > +2357,23 @@ netdev_dpdk_rxq_recv(struct netdev_rxq *rxq, > > > > > > +struct > > > > > dp_packet_batch *batch, > > > > > > return 0; > > > > > > } > > > > > > > > > > > > +static int > > > > > > +netdev_dpdk_vdpa_rxq_recv(struct netdev_rxq *rxq, > > > > > > + struct dp_packet_batch *batch, > > > > > > + int *qfill) { > > > > > > +struct netdev_dpdk *dev = netdev_dpdk_cast(rxq->netdev); > > > > > > +int fwd_rx; > > > > > > +int ret; > > > > > > + > > > > > > +fwd_rx = netdev_dpdk_vdpa_rxq_recv_impl(dev->relay, > > > > > > + rxq->queue_id); > > > > > I'm still not clear about the above function. > > > > > So netdev_dpdk_vdpa_recv_impl()
Re: [ovs-dev] [PATCH v5 ovn] Add DNSSL support to OVN
> On Tue, Oct 29, 2019 at 3:29 PM Lorenzo Bianconi > wrote: > > > > Introduce the possibility to specify a DNSSL option to Router > > Advertisement packets. DNS Search list can be specified using > > 'dnssl' tag in the ipv6_ra_configs column of logical router > > port table > > > > Signed-off-by: Lorenzo Bianconi > > --- > > Changes since v4: > > - update documentation > > - fix possible array out of bounds issue in packet_put_ra_dnssl_opt > > - fix strtok_r usage > > --- > > controller/pinctrl.c | 59 > > lib/ovn-l7.h | 11 + > > northd/ovn-northd.c | 4 +++ > > ovn-nb.xml | 5 > > tests/ovn.at | 30 ++ > > 5 files changed, 99 insertions(+), 10 deletions(-) > > > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > > index f105ebb5c..cddf98648 100644 > > --- a/controller/pinctrl.c > > +++ b/controller/pinctrl.c > > @@ -2194,6 +2194,7 @@ struct ipv6_ra_config { > > struct lport_addresses prefixes; > > struct in6_addr rdnss; > > bool has_rdnss; > > +struct ds dnssl; > > }; > > > > struct ipv6_ra_state { > > @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config) > > { > > if (config) { > > destroy_lport_addresses(&config->prefixes); > > +ds_destroy(&config->dnssl); > > free(config); > > } > > } > > @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding > > *pb) > > nd_ra_min_interval_default(config->max_interval)); > > config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", > > ND_MTU_DEFAULT); > > config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK; > > +ds_init(&config->dnssl); > > > > const char *address_mode = smap_get(&pb->options, > > "ipv6_ra_address_mode"); > > if (!address_mode) { > > @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct > > sbrec_port_binding *pb) > > } > > config->has_rdnss = !!rdnss; > > > > +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl"); > > +if (dnssl) { > > +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl)); > > +} > > + > > return config; > > > > fail: > > @@ -2354,6 +2362,53 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t > > num, > >prev_l4_size + len * > > 8)); > > } > > > > +static void > > +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime, > > +char *dnssl_list) > > +{ > > +size_t prev_l4_size = dp_packet_l4_size(b); > > +size_t size = sizeof(struct ovs_nd_dnssl); > > +struct ip6_hdr *nh = dp_packet_l3(b); > > +char *t0, *r0, dnssl[255] = {}; > > +int i = 0; > > + > > +/* Multiple DNS Search List must be 'comma' separated > > + * (e.g. "a.b.c, d.e.f") */ > > Can you please add a comment on how the dns list will be encode with an > example. > Like if dns list is a.b.c,www.ovn.org then it will be encoded as - 01 > 61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00 > > This will really help in understanding what's going on. Also can you > link the RFC version. Hi Numan, Thx for the review. Ack, will do in v6 > > > +for (t0 = strtok_r(dnssl_list, ",", &r0); t0; > > + t0 = strtok_r(NULL, ",", &r0)) { > > +char *t1, *r1; > > + > > +size += strlen(t0) + 2; > > +if (size > sizeof(dnssl)) { > > For the sake of simplicity let's assume the length of dnssl is 10. > Then if the dns list is a.b.c,d.e.f then this will cause the buffer to > overflow right ? > > For the first outer for loop with t0 = a.b.c, the 'size' will be 7. So 7 < 10. > In the next outer for loop with t0 = d.e.f, the size will be again 7 > and it will cause the inner for loop to copy the contents beyond > the size of dnssl right ? > > I think this code needs to be refined a bit to take all these things > into consideration. > I guess the check is fine since size value will not be reset: size += strlen(t0) + 2; Regards, Lorenzo > Thanks > Numan > > > +return; > > +} > > + > > > > > +for (t1 = strtok_r(t0, ".", &r1); t1; > > + t1 = strtok_r(NULL, ".", &r1)) { > > +dnssl[i++] = strlen(t1); > > +memcpy(&dnssl[i], t1, strlen(t1)); > > +i += strlen(t1); > > +} > > +dnssl[i++] = 0; > > +} > > +size = ROUND_UP(size, 8); > > +nh->ip6_plen = htons(prev_l4_size + size); > > + > > +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof > > *nd_dnssl); > > +nd_dnssl->type = ND_OPT_DNSSL; > > +nd_dnssl->len = size / 8; > > +nd_dnssl->reserved = 0; > > +put_16aligned_be32(&nd_dnssl->lifetime, lifetime); > > + > > +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl); > > + > > +struct ovs_ra_msg *ra = dp_packet_l4(b); > > +ra->icmph.icmp6_cksum = 0; > > +uint32_t icmp_csum = packet_csum_pseu
Re: [ovs-dev] [PATCH v5 ovn] Add DNSSL support to OVN
On Tue, Oct 29, 2019 at 3:29 PM Lorenzo Bianconi wrote: > > Introduce the possibility to specify a DNSSL option to Router > Advertisement packets. DNS Search list can be specified using > 'dnssl' tag in the ipv6_ra_configs column of logical router > port table > > Signed-off-by: Lorenzo Bianconi > --- > Changes since v4: > - update documentation > - fix possible array out of bounds issue in packet_put_ra_dnssl_opt > - fix strtok_r usage > --- > controller/pinctrl.c | 59 > lib/ovn-l7.h | 11 + > northd/ovn-northd.c | 4 +++ > ovn-nb.xml | 5 > tests/ovn.at | 30 ++ > 5 files changed, 99 insertions(+), 10 deletions(-) > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > index f105ebb5c..cddf98648 100644 > --- a/controller/pinctrl.c > +++ b/controller/pinctrl.c > @@ -2194,6 +2194,7 @@ struct ipv6_ra_config { > struct lport_addresses prefixes; > struct in6_addr rdnss; > bool has_rdnss; > +struct ds dnssl; > }; > > struct ipv6_ra_state { > @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config) > { > if (config) { > destroy_lport_addresses(&config->prefixes); > +ds_destroy(&config->dnssl); > free(config); > } > } > @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding > *pb) > nd_ra_min_interval_default(config->max_interval)); > config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT); > config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK; > +ds_init(&config->dnssl); > > const char *address_mode = smap_get(&pb->options, > "ipv6_ra_address_mode"); > if (!address_mode) { > @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding > *pb) > } > config->has_rdnss = !!rdnss; > > +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl"); > +if (dnssl) { > +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl)); > +} > + > return config; > > fail: > @@ -2354,6 +2362,53 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t > num, >prev_l4_size + len * > 8)); > } > > +static void > +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime, > +char *dnssl_list) > +{ > +size_t prev_l4_size = dp_packet_l4_size(b); > +size_t size = sizeof(struct ovs_nd_dnssl); > +struct ip6_hdr *nh = dp_packet_l3(b); > +char *t0, *r0, dnssl[255] = {}; > +int i = 0; > + > +/* Multiple DNS Search List must be 'comma' separated > + * (e.g. "a.b.c, d.e.f") */ Can you please add a comment on how the dns list will be encode with an example. Like if dns list is a.b.c,www.ovn.org then it will be encoded as - 01 61 01 62 01 63 00 03 77 77 77 03 6f 76 63 03 6f 72 67 00 This will really help in understanding what's going on. Also can you link the RFC version. > +for (t0 = strtok_r(dnssl_list, ",", &r0); t0; > + t0 = strtok_r(NULL, ",", &r0)) { > +char *t1, *r1; > + > +size += strlen(t0) + 2; > +if (size > sizeof(dnssl)) { For the sake of simplicity let's assume the length of dnssl is 10. Then if the dns list is a.b.c,d.e.f then this will cause the buffer to overflow right ? For the first outer for loop with t0 = a.b.c, the 'size' will be 7. So 7 < 10. In the next outer for loop with t0 = d.e.f, the size will be again 7 and it will cause the inner for loop to copy the contents beyond the size of dnssl right ? I think this code needs to be refined a bit to take all these things into consideration. Thanks Numan > +return; > +} > + > +for (t1 = strtok_r(t0, ".", &r1); t1; > + t1 = strtok_r(NULL, ".", &r1)) { > +dnssl[i++] = strlen(t1); > +memcpy(&dnssl[i], t1, strlen(t1)); > +i += strlen(t1); > +} > +dnssl[i++] = 0; > +} > +size = ROUND_UP(size, 8); > +nh->ip6_plen = htons(prev_l4_size + size); > + > +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof > *nd_dnssl); > +nd_dnssl->type = ND_OPT_DNSSL; > +nd_dnssl->len = size / 8; > +nd_dnssl->reserved = 0; > +put_16aligned_be32(&nd_dnssl->lifetime, lifetime); > + > +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl); > + > +struct ovs_ra_msg *ra = dp_packet_l4(b); > +ra->icmph.icmp6_cksum = 0; > +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b)); > +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra, > + prev_l4_size + size)); > +} > + > /* Called with in the pinctrl_handler thread context. */ > static long long int > ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra) > @@ -2382,6 +2437,10 @@ ipv6_ra_send(struct rconn *swconn, struct > ipv6_ra_state *ra) > packet_put_ra_
[ovs-dev] [PATCH v5 ovn] Add DNSSL support to OVN
Introduce the possibility to specify a DNSSL option to Router Advertisement packets. DNS Search list can be specified using 'dnssl' tag in the ipv6_ra_configs column of logical router port table Signed-off-by: Lorenzo Bianconi --- Changes since v4: - update documentation - fix possible array out of bounds issue in packet_put_ra_dnssl_opt - fix strtok_r usage --- controller/pinctrl.c | 59 lib/ovn-l7.h | 11 + northd/ovn-northd.c | 4 +++ ovn-nb.xml | 5 tests/ovn.at | 30 ++ 5 files changed, 99 insertions(+), 10 deletions(-) diff --git a/controller/pinctrl.c b/controller/pinctrl.c index f105ebb5c..cddf98648 100644 --- a/controller/pinctrl.c +++ b/controller/pinctrl.c @@ -2194,6 +2194,7 @@ struct ipv6_ra_config { struct lport_addresses prefixes; struct in6_addr rdnss; bool has_rdnss; +struct ds dnssl; }; struct ipv6_ra_state { @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config) { if (config) { destroy_lport_addresses(&config->prefixes); +ds_destroy(&config->dnssl); free(config); } } @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding *pb) nd_ra_min_interval_default(config->max_interval)); config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT); config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK; +ds_init(&config->dnssl); const char *address_mode = smap_get(&pb->options, "ipv6_ra_address_mode"); if (!address_mode) { @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding *pb) } config->has_rdnss = !!rdnss; +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl"); +if (dnssl) { +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl)); +} + return config; fail: @@ -2354,6 +2362,53 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t num, prev_l4_size + len * 8)); } +static void +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime, +char *dnssl_list) +{ +size_t prev_l4_size = dp_packet_l4_size(b); +size_t size = sizeof(struct ovs_nd_dnssl); +struct ip6_hdr *nh = dp_packet_l3(b); +char *t0, *r0, dnssl[255] = {}; +int i = 0; + +/* Multiple DNS Search List must be 'comma' separated + * (e.g. "a.b.c, d.e.f") */ +for (t0 = strtok_r(dnssl_list, ",", &r0); t0; + t0 = strtok_r(NULL, ",", &r0)) { +char *t1, *r1; + +size += strlen(t0) + 2; +if (size > sizeof(dnssl)) { +return; +} + +for (t1 = strtok_r(t0, ".", &r1); t1; + t1 = strtok_r(NULL, ".", &r1)) { +dnssl[i++] = strlen(t1); +memcpy(&dnssl[i], t1, strlen(t1)); +i += strlen(t1); +} +dnssl[i++] = 0; +} +size = ROUND_UP(size, 8); +nh->ip6_plen = htons(prev_l4_size + size); + +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof *nd_dnssl); +nd_dnssl->type = ND_OPT_DNSSL; +nd_dnssl->len = size / 8; +nd_dnssl->reserved = 0; +put_16aligned_be32(&nd_dnssl->lifetime, lifetime); + +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl); + +struct ovs_ra_msg *ra = dp_packet_l4(b); +ra->icmph.icmp6_cksum = 0; +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b)); +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra, + prev_l4_size + size)); +} + /* Called with in the pinctrl_handler thread context. */ static long long int ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra) @@ -2382,6 +2437,10 @@ ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra) packet_put_ra_rdnss_opt(&packet, 1, htonl(0x), &ra->config->rdnss); } +if (ra->config->dnssl.length) { +packet_put_ra_dnssl_opt(&packet, htonl(0x), +ra->config->dnssl.string); +} uint64_t ofpacts_stub[4096 / 8]; struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h index 12cee6329..5fc370bf5 100644 --- a/lib/ovn-l7.h +++ b/lib/ovn-l7.h @@ -231,6 +231,17 @@ struct nd_rdnss_opt { }; BUILD_ASSERT_DECL(ND_RDNSS_OPT_LEN == sizeof(struct nd_rdnss_opt)); +/* DNSSL option RFC 6106 */ +#define ND_OPT_DNSSL31 +#define ND_DNSSL_OPT_LEN8 +struct ovs_nd_dnssl { +u_int8_t type; /* ND_OPT_DNSSL */ +u_int8_t len; /* >= 2 */ +ovs_be16 reserved; +ovs_16aligned_be32 lifetime; +}; +BUILD_ASSERT_DECL(ND_DNSSL_OPT_LEN == sizeof(struct ovs_nd_dnssl)); + #define DHCPV6_DUID_LL 3 #define DHCPV6_HW_TYPE_ETH 1 diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c index 33fe9363c..194e4bf4a 100644 --- a/northd/ovn-northd.
Re: [ovs-dev] [PATCH ovs v3 0/2] Introduce dpdkvdpa netdev
> -Original Message- > From: Roni Bar Yanai > Sent: Monday, October 28, 2019 10:25 PM > To: Ilya Maximets ; Noa Levy ; > ovs-dev@openvswitch.org > Cc: Oz Shlomo ; Majd Dibbiny > ; Ameer Mahagneh ; Eli > Britstein ; William Tu ; Simon > Horman > Subject: RE: [ovs-dev] [PATCH ovs v3 0/2] Introduce dpdkvdpa netdev > > Hi ilya, > please see inline > > >-Original Message- > >From: Ilya Maximets > >Sent: Monday, October 28, 2019 3:46 PM > >To: Noa Levy ; ovs-dev@openvswitch.org; Roni Bar > >Yanai > >Cc: Oz Shlomo ; Majd Dibbiny > ; > >Ameer Mahagneh ; Eli Britstein > >; William Tu ; Simon Horman > > > >Subject: Re: [ovs-dev] [PATCH ovs v3 0/2] Introduce dpdkvdpa netdev > > > >On 17.10.2019 13:16, Noa Ezra wrote: > >> There are two approaches to communicate with a guest, using virtIO or > >> SR-IOV. > >> SR-IOV allows working with port representor which is attached to the > >> OVS and a matching VF is given with pass-through to the VM. > >> HW rules can process packets from up-link and direct them to the VF > >> without going through SW (OVS) and therefore SR-IOV gives the best > >> performance. > >> However, SR-IOV architecture requires that the guest will use a > >> driver which is specific to the underlying HW. Specific HW driver has > >> two main > >> drawbacks: > >> 1. Breaks virtualization in some sense (VM aware of the HW), can also > >> limit the type of images supported. > >> 2. Less natural support for live migration. > >> > >> Using virtIO interface solves both problems, but reduces performance > >> and causes losing of some functionality, for example, for some HW > >> offload, working directly with virtIO cannot be supported. > >> In order to solve this conflict, we created a new netdev type-dpdkvdpa. > >> The new netdev is basically similar to a regular dpdk netdev, but it > >> has some additional functionality for transferring packets from > >> virtIO guest (VM) to a VF and vice versa. With this solution we can > >> benefit both SR-IOV and virtIO. > >> vDPA netdev is designed to support both SW and HW use-cases. > >> HW mode will be used to configure vDPA capable devices. The support > >> for this mode is on progress in the dpdk community. > >> SW acceleration is used to leverage SR-IOV offloads to virtIO guests > >> by relaying packets between VF and virtio devices and as a pre-step > >> for supporting vDPA in HW mode. > >> > >> Running example: > >> 1. Configure OVS bridge and ports: > >> ovs-vsctl add-br br0-ovs -- set bridge br0-ovs datapath_type=netdev > >> ovs-vsctl add-port br0-ovs pf -- set Interface pf type=dpdk options: \ > >> dpdk-devargs= > >> ovs-vsctl add-port br0 vdpa0 -- set Interface vdpa0 type=dpdkvdpa \ > >> options:vdpa-socket-path= \ > >> options:vdpa-accelerator-devargs= \ > >> options:dpdk-devargs=,representor=[id] 2. Run a > >> virtIO guest (VM) in server mode that creates the socket of > >> the vDPA port. > >> 3. Send traffic. > >> > >> Noa Ezra (2): > >>netdev-dpdk-vdpa: Introduce dpdkvdpa netdev > >>netdev-dpdk: Add dpdkvdpa port > >> > >> Documentation/automake.mk | 1 + > >> Documentation/topics/dpdk/index.rst | 1 + > >> Documentation/topics/dpdk/vdpa.rst | 90 + > >> NEWS| 1 + > >> lib/automake.mk | 4 +- > >> lib/netdev-dpdk-vdpa.c | 750 > > > >> lib/netdev-dpdk-vdpa.h | 54 +++ > >> lib/netdev-dpdk.c | 162 > >> vswitchd/vswitch.xml| 25 ++ > >> 9 files changed, 1087 insertions(+), 1 deletion(-) > >> create mode 100644 Documentation/topics/dpdk/vdpa.rst > >> create mode 100755 lib/netdev-dpdk-vdpa.c > >> create mode 100644 lib/netdev-dpdk-vdpa.h > >> > > > >Hi, everyone. > > > >So, I have a few questions (mostly to Roni?): > > > >1. What happened to idea of implementing this as a DPDK vdev? > We wanted to solve both OVS-kernel and OVS-DPDK issue. > The main argument against it was that we allow to define ports that are not > OF ports on the switch. regardless of how useful we think this feature can be > it breaks something fundamental, and looking back I agree. > The vdev, was a workaround, but after speaking with some of our DPDK > experts, they had similar arguments, this time for DPDK world. You create a > port that never gets packet and never sends packets. This totally doesn't > make sense even if it can be useful in some cases. When we remove the > kernel form the equation, we are left with vDPA. > In fact we followed your suggestion of having SW vDPA and HW vDPA. > This makes total sense, for example open stack can configure vDPA and let > the OVS probe the device and go with HW or SW, leaving same functionality > and same configuration. > > > > >2. What was the results of "direct output optimization" patch [1] testing? > >At this point I also have to mention that OVS is going to ha
Re: [ovs-dev] [PATCH] lacp: warn transmit failure of lacp pdu
Hi Ben, Idea is to log lacp specific tx failure. A one step further as in ofproto_dpif_send_packet has to make the debug info more generic, even though it would cater all of its calling functions. For example, bundle_send_learning_packets reports such tx failures for gratuitous packets. I think, we need to handle such warning at the calling place itself to debug quickly. Thanks, Gowrishankar On Fri, Oct 25, 2019 at 2:47 AM Ben Pfaff wrote: > On Mon, Oct 21, 2019 at 07:34:36PM +0530, Gowrishankar Muthukrishnan wrote: > > It might be difficult to trace whether LACP PDU tx (as in > > response) was successful when the pdu was not transmitted by > > egress slave for various reasons (including resource contention > > within NIC) and only way to trace its fate is by looking at > > ofproto->stats.tx_[packets/bytes] and slave port stats. > > > > Adding a warning when there is tx failure could help user > > debug at the root of this problem. > > > > Signed-off-by: Gowrishankar Muthukrishnan > > This logic seems like it's probably true for all users of > ofproto_dpif_send_packet(), not just LACP PDU transmission. Would it be > a good idea to simply make ofproto_dpif_send_packet() log a warning when > tx fails? > ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH ovn] ovndb-servers.ocf: Change from 'openvswitch' to 'ovn' in default vars.
On Tue, Oct 29, 2019 at 1:31 PM Numan Siddique wrote: > > On Mon, Oct 28, 2019 at 10:57 PM Ben Pfaff wrote: > > > > On Mon, Oct 28, 2019 at 10:25:34AM -0700, Ben Pfaff wrote: > > > On Mon, Oct 28, 2019 at 08:04:15PM +0530, num...@ovn.org wrote: > > > > From: Numan Siddique > > > > > > > > CC: Aliasgar Ginwala > > > > Signed-off-by: Numan Siddique > > > > > > Should we introduce something into the distro packaging, at the same > > > time, to migrate from the old to the new locations automatically? > > > > Or, alternatively, should the script tolerate finding these in the old > > location? > > > > Should we add something to the ovn upgrade guide talking about the > > migration? > > I think we will have the same issue even for the ovn NB and SB dbs. > The default path of OVN NB and > SB dbs is - /etc/openvswitch before the split and after the split it > is /etc/ovn. > > I agree that we should add upgrade guide taking about the migration. I > will work on it. > > In this particular case, the default values are changed. And these > values can always be overridden > when starting or updating the pacemaker service like > - pcs resource create ovn-dbs ... ovn_nb_db_privkey= ... > > In the case of openstack tripleo, where OVN services are run as > containers, we have to mount > the host directories properly. If the host path - > /var/lib/openvswitch/ovn is mounted as /etc/openvswitch > inside the container, we need to also mount the same host path to > /etc/ovn so that the upgrade to > new OVN post split will be seamless. > > I thought that this can be handled in distro install scripts like > creating a symbolic link, but this > doesn't address the deployments which run ovn services inside containers. > > I think we can also enhance the ovn-ctl script to tolerate old > locations. I will explore more on this. > > I will rework this patch to look into both the locations if the user > has not set these variables when > starting the service from pacemaker. I went ahead and applied the below changes as it makes sense. Otherwise pacemaker ovn resource will fail since it can't find ovn-ctl script in the old path. diff --git a/utilities/ovndb-servers.ocf b/utilities/ovndb-servers.ocf index cd4742668..4678a1dc9 100755 --- a/utilities/ovndb-servers.ocf +++ b/utilities/ovndb-servers.ocf @@ -2,7 +2,7 @@ : ${OCF_FUNCTIONS_DIR=${OCF_ROOT}/lib/heartbeat} . ${OCF_FUNCTIONS_DIR}/ocf-shellfuncs -: ${OVN_CTL_DEFAULT="/usr/share/openvswitch/scripts/ovn-ctl"} +: ${OVN_CTL_DEFAULT="/usr/share/ovn/scripts/ovn-ctl"} : ${NB_MASTER_PORT_DEFAULT="6641"} : ${NB_MASTER_PROTO_DEFAULT="tcp"} : ${SB_MASTER_PORT_DEFAULT="6642"} Thanks Numan > > Thanks > Numan > > > > ___ > > dev mailing list > > d...@openvswitch.org > > https://mail.openvswitch.org/mailman/listinfo/ovs-dev ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v4 ovn 2/2] Add DNSSL support to OVN
> On Mon, Oct 28, 2019 at 8:10 PM Lorenzo Bianconi > wrote: > > > > Introduce the possibility to specify a DNSSL option to Router > > Advertisement packets. DNS Search list can be specified using > > 'dnssl' tag in the ipv6_ra_configs column of logical router > > port table > > > > Signed-off-by: Lorenzo Bianconi > > --- > > controller/pinctrl.c | 51 > > lib/ovn-l7.h | 11 ++ > > northd/ovn-northd.c | 4 > > ovn-nb.xml | 5 + > > tests/ovn.at | 30 +- > > 5 files changed, 91 insertions(+), 10 deletions(-) > > > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > > index f105ebb5c..0bd1ef3ef 100644 > > --- a/controller/pinctrl.c > > +++ b/controller/pinctrl.c > > @@ -2194,6 +2194,7 @@ struct ipv6_ra_config { > > struct lport_addresses prefixes; > > struct in6_addr rdnss; > > bool has_rdnss; > > +struct ds dnssl; > > }; > > > > struct ipv6_ra_state { > > @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config) > > { > > if (config) { > > destroy_lport_addresses(&config->prefixes); > > +ds_destroy(&config->dnssl); > > free(config); > > } > > } > > @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding > > *pb) > > nd_ra_min_interval_default(config->max_interval)); > > config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", > > ND_MTU_DEFAULT); > > config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK; > > +ds_init(&config->dnssl); > > > > const char *address_mode = smap_get(&pb->options, > > "ipv6_ra_address_mode"); > > if (!address_mode) { > > @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct > > sbrec_port_binding *pb) > > } > > config->has_rdnss = !!rdnss; > > > > +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl"); > > +if (dnssl) { > > +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl)); > > +} > > + > > return config; > > > > fail: > > @@ -2354,6 +2362,45 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t > > num, > >prev_l4_size + len * > > 8)); > > } > > > > +static void > > +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime, > > +char *dnssl_list) > > +{ > > +char *t0, *r0 = dnssl_list, dnssl[255] = {}; > > +size_t prev_l4_size = dp_packet_l4_size(b); > > +size_t size = sizeof(struct ovs_nd_dnssl); > > +struct ip6_hdr *nh = dp_packet_l3(b); > > +int i = 0; > > + > > +while ((t0 = strtok_r(r0, ",", &r0))) { Hi Numan, thx for the review. > > Are we not suppose to pass NULL as the first argument to subsequent > calls of strtok_r ? > right, will fix in v5 > Is there a chance that we might cross dnssl 255 length and have a segfault ? > right, will fix in v5 > Can you please add comments on how the dnssl string is to be encoded ? > ack, will add in v5 > Since the code here, is already handling for multiple dnssl values in > the "dnssl_list", we can just accept > multiple values of dnssl right ? Right now the patch is restricting to > just 1 dnssl for the CMS to configure. right. Regards, Lorenzo > > Thanks > Numan > > > > +char *t1, *r1 = t0; > > + > > +size += strlen(t0) + 2; > > +while ((t1 = strtok_r(r1, ".", &r1))) { > > +dnssl[i++] = strlen(t1); > > +memcpy(&dnssl[i], t1, strlen(t1)); > > +i += strlen(t1); > > +} > > +dnssl[i++] = 0; > > +} > > +size = ROUND_UP(size, 8); > > +nh->ip6_plen = htons(prev_l4_size + size); > > + > > +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof > > *nd_dnssl); > > +nd_dnssl->type = ND_OPT_DNSSL; > > +nd_dnssl->len = size / 8; > > +nd_dnssl->reserved = 0; > > +put_16aligned_be32(&nd_dnssl->lifetime, lifetime); > > + > > +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl); > > + > > +struct ovs_ra_msg *ra = dp_packet_l4(b); > > +ra->icmph.icmp6_cksum = 0; > > +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b)); > > +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra, > > + prev_l4_size + > > size)); > > +} > > + > > /* Called with in the pinctrl_handler thread context. */ > > static long long int > > ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra) > > @@ -2382,6 +2429,10 @@ ipv6_ra_send(struct rconn *swconn, struct > > ipv6_ra_state *ra) > > packet_put_ra_rdnss_opt(&packet, 1, htonl(0x), > > &ra->config->rdnss); > > } > > +if (ra->config->dnssl.length) { > > +packet_put_ra_dnssl_opt(&packet, htonl(0x), > > +ra->config->dnssl.string); > > +} > > > > uint64_t ofpacts_stub[40
Re: [ovs-dev] [PATCH ovn] ovndb-servers.ocf: Change from 'openvswitch' to 'ovn' in default vars.
On Mon, Oct 28, 2019 at 10:57 PM Ben Pfaff wrote: > > On Mon, Oct 28, 2019 at 10:25:34AM -0700, Ben Pfaff wrote: > > On Mon, Oct 28, 2019 at 08:04:15PM +0530, num...@ovn.org wrote: > > > From: Numan Siddique > > > > > > CC: Aliasgar Ginwala > > > Signed-off-by: Numan Siddique > > > > Should we introduce something into the distro packaging, at the same > > time, to migrate from the old to the new locations automatically? > > Or, alternatively, should the script tolerate finding these in the old > location? > > Should we add something to the ovn upgrade guide talking about the > migration? I think we will have the same issue even for the ovn NB and SB dbs. The default path of OVN NB and SB dbs is - /etc/openvswitch before the split and after the split it is /etc/ovn. I agree that we should add upgrade guide taking about the migration. I will work on it. In this particular case, the default values are changed. And these values can always be overridden when starting or updating the pacemaker service like - pcs resource create ovn-dbs ... ovn_nb_db_privkey= ... In the case of openstack tripleo, where OVN services are run as containers, we have to mount the host directories properly. If the host path - /var/lib/openvswitch/ovn is mounted as /etc/openvswitch inside the container, we need to also mount the same host path to /etc/ovn so that the upgrade to new OVN post split will be seamless. I thought that this can be handled in distro install scripts like creating a symbolic link, but this doesn't address the deployments which run ovn services inside containers. I think we can also enhance the ovn-ctl script to tolerate old locations. I will explore more on this. I will rework this patch to look into both the locations if the user has not set these variables when starting the service from pacemaker. Thanks Numan > ___ > dev mailing list > d...@openvswitch.org > https://mail.openvswitch.org/mailman/listinfo/ovs-dev ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH net-next v4 08/10] net: openvswitch: fix possible memleak on destroy flow-table
On Sun, Oct 27, 2019 at 11:49 PM Tonghao Zhang wrote: > > On Thu, Oct 24, 2019 at 3:14 PM Pravin Shelar wrote: > > > > On Tue, Oct 22, 2019 at 7:35 PM Tonghao Zhang > > wrote: > > > > > > On Tue, Oct 22, 2019 at 2:58 PM Pravin Shelar wrote: > > > > > > ... > > ... > > > struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti, > > > @@ -400,10 +458,9 @@ static struct table_instance > > > *table_instance_rehash(struct table_instance *ti, > > > return new_ti; > > > } > > > > > > -int ovs_flow_tbl_flush(struct flow_table *flow_table) > > > +int ovs_flow_tbl_flush(struct flow_table *table) > > > { > > > - struct table_instance *old_ti, *new_ti; > > > - struct table_instance *old_ufid_ti, *new_ufid_ti; > > > + struct table_instance *new_ti, *new_ufid_ti; > > > > > > new_ti = table_instance_alloc(TBL_MIN_BUCKETS); > > > if (!new_ti) > > > @@ -412,16 +469,12 @@ int ovs_flow_tbl_flush(struct flow_table > > > *flow_table) > > > if (!new_ufid_ti) > > > goto err_free_ti; > > > > > > - old_ti = ovsl_dereference(flow_table->ti); > > > - old_ufid_ti = ovsl_dereference(flow_table->ufid_ti); > > > + table_instance_destroy(table, true); > > > > > This would destroy running table causing unnecessary flow miss. Lets > > keep current scheme of setting up new table before destroying current > > one. > > > > > - rcu_assign_pointer(flow_table->ti, new_ti); ... > /* Must be called with OVS mutex held. */ > void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow) > { > @@ -752,17 +794,7 @@ void ovs_flow_tbl_remove(struct flow_table > *table, struct sw_flow *flow) > struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti); > > BUG_ON(table->count == 0); > - hlist_del_rcu(&flow->flow_table.node[ti->node_ver]); > - table->count--; > - if (ovs_identifier_is_ufid(&flow->id)) { > - hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]); > - table->ufid_count--; > - } > - > - /* RCU delete the mask. 'flow->mask' is not NULLed, as it should be > -* accessible as long as the RCU read lock is held. > -*/ > - flow_mask_remove(table, flow->mask); > + table_instance_remove(table, ti, ufid_ti, flow, true); > } Lets rename table_instance_remove() to imply it is freeing a flow. Otherwise looks good. ___ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev
Re: [ovs-dev] [PATCH v4 ovn 2/2] Add DNSSL support to OVN
On Mon, Oct 28, 2019 at 8:10 PM Lorenzo Bianconi wrote: > > Introduce the possibility to specify a DNSSL option to Router > Advertisement packets. DNS Search list can be specified using > 'dnssl' tag in the ipv6_ra_configs column of logical router > port table > > Signed-off-by: Lorenzo Bianconi > --- > controller/pinctrl.c | 51 > lib/ovn-l7.h | 11 ++ > northd/ovn-northd.c | 4 > ovn-nb.xml | 5 + > tests/ovn.at | 30 +- > 5 files changed, 91 insertions(+), 10 deletions(-) > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > index f105ebb5c..0bd1ef3ef 100644 > --- a/controller/pinctrl.c > +++ b/controller/pinctrl.c > @@ -2194,6 +2194,7 @@ struct ipv6_ra_config { > struct lport_addresses prefixes; > struct in6_addr rdnss; > bool has_rdnss; > +struct ds dnssl; > }; > > struct ipv6_ra_state { > @@ -2215,6 +2216,7 @@ ipv6_ra_config_delete(struct ipv6_ra_config *config) > { > if (config) { > destroy_lport_addresses(&config->prefixes); > +ds_destroy(&config->dnssl); > free(config); > } > } > @@ -2253,6 +2255,7 @@ ipv6_ra_update_config(const struct sbrec_port_binding > *pb) > nd_ra_min_interval_default(config->max_interval)); > config->mtu = smap_get_int(&pb->options, "ipv6_ra_mtu", ND_MTU_DEFAULT); > config->la_flags = IPV6_ND_RA_OPT_PREFIX_ON_LINK; > +ds_init(&config->dnssl); > > const char *address_mode = smap_get(&pb->options, > "ipv6_ra_address_mode"); > if (!address_mode) { > @@ -2298,6 +2301,11 @@ ipv6_ra_update_config(const struct sbrec_port_binding > *pb) > } > config->has_rdnss = !!rdnss; > > +const char *dnssl = smap_get(&pb->options, "ipv6_ra_dnssl"); > +if (dnssl) { > +ds_put_buffer(&config->dnssl, dnssl, strlen(dnssl)); > +} > + > return config; > > fail: > @@ -2354,6 +2362,45 @@ packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t > num, >prev_l4_size + len * > 8)); > } > > +static void > +packet_put_ra_dnssl_opt(struct dp_packet *b, ovs_be32 lifetime, > +char *dnssl_list) > +{ > +char *t0, *r0 = dnssl_list, dnssl[255] = {}; > +size_t prev_l4_size = dp_packet_l4_size(b); > +size_t size = sizeof(struct ovs_nd_dnssl); > +struct ip6_hdr *nh = dp_packet_l3(b); > +int i = 0; > + > +while ((t0 = strtok_r(r0, ",", &r0))) { Are we not suppose to pass NULL as the first argument to subsequent calls of strtok_r ? Is there a chance that we might cross dnssl 255 length and have a segfault ? Can you please add comments on how the dnssl string is to be encoded ? Since the code here, is already handling for multiple dnssl values in the "dnssl_list", we can just accept multiple values of dnssl right ? Right now the patch is restricting to just 1 dnssl for the CMS to configure. Thanks Numan > +char *t1, *r1 = t0; > + > +size += strlen(t0) + 2; > +while ((t1 = strtok_r(r1, ".", &r1))) { > +dnssl[i++] = strlen(t1); > +memcpy(&dnssl[i], t1, strlen(t1)); > +i += strlen(t1); > +} > +dnssl[i++] = 0; > +} > +size = ROUND_UP(size, 8); > +nh->ip6_plen = htons(prev_l4_size + size); > + > +struct ovs_nd_dnssl *nd_dnssl = dp_packet_put_uninit(b, sizeof > *nd_dnssl); > +nd_dnssl->type = ND_OPT_DNSSL; > +nd_dnssl->len = size / 8; > +nd_dnssl->reserved = 0; > +put_16aligned_be32(&nd_dnssl->lifetime, lifetime); > + > +dp_packet_put(b, dnssl, size - sizeof *nd_dnssl); > + > +struct ovs_ra_msg *ra = dp_packet_l4(b); > +ra->icmph.icmp6_cksum = 0; > +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b)); > +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra, > + prev_l4_size + size)); > +} > + > /* Called with in the pinctrl_handler thread context. */ > static long long int > ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra) > @@ -2382,6 +2429,10 @@ ipv6_ra_send(struct rconn *swconn, struct > ipv6_ra_state *ra) > packet_put_ra_rdnss_opt(&packet, 1, htonl(0x), > &ra->config->rdnss); > } > +if (ra->config->dnssl.length) { > +packet_put_ra_dnssl_opt(&packet, htonl(0x), > +ra->config->dnssl.string); > +} > > uint64_t ofpacts_stub[4096 / 8]; > struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h > index 12cee6329..5fc370bf5 100644 > --- a/lib/ovn-l7.h > +++ b/lib/ovn-l7.h > @@ -231,6 +231,17 @@ struct nd_rdnss_opt { > }; > BUILD_ASSERT_DECL(ND_RDNSS_OPT_LEN == sizeof(struct nd_rdnss_opt)); > > +/* DNSSL option RFC 6106 */ > +#define ND_OPT_DNSSL31 > +#define ND_DNSSL_OPT_L
Re: [ovs-dev] [PATCH v4 ovn 1/2] Add RDNSS support to OVN
On Mon, Oct 28, 2019 at 8:09 PM Lorenzo Bianconi wrote: > > Introduce the possibility to specify a RDNSS option to Router > Advertisement packets. DNS IPv6 address can be specified using > 'rdnss' tag in the ipv6_ra_configs column of logical router > port table > > Acked-by: Mark Michelson > Signed-off-by: Lorenzo Bianconi Thanks. I applied this patch to master. Numan > --- > controller/pinctrl.c | 39 +++ > lib/ovn-l7.h | 11 +++ > northd/ovn-northd.c | 5 + > ovn-nb.xml | 5 + > tests/ovn.at | 27 +++ > 5 files changed, 79 insertions(+), 8 deletions(-) > > diff --git a/controller/pinctrl.c b/controller/pinctrl.c > index d826da186..f105ebb5c 100644 > --- a/controller/pinctrl.c > +++ b/controller/pinctrl.c > @@ -2192,6 +2192,8 @@ struct ipv6_ra_config { > uint8_t mo_flags; /* Managed/Other flags for RAs */ > uint8_t la_flags; /* On-link/autonomous flags for address prefixes */ > struct lport_addresses prefixes; > +struct in6_addr rdnss; > +bool has_rdnss; > }; > > struct ipv6_ra_state { > @@ -2289,6 +2291,12 @@ ipv6_ra_update_config(const struct sbrec_port_binding > *pb) > VLOG_WARN("Invalid IP source %s", ip_addr); > goto fail; > } > +const char *rdnss = smap_get(&pb->options, "ipv6_ra_rdnss"); > +if (rdnss && !ipv6_parse(rdnss, &config->rdnss)) { > +VLOG_WARN("Invalid RDNSS source %s", rdnss); > +goto fail; > +} > +config->has_rdnss = !!rdnss; > > return config; > > @@ -2319,6 +2327,33 @@ put_load(uint64_t value, enum mf_field_id dst, int > ofs, int n_bits, > bitwise_one(ofpact_set_field_mask(sf), sf->field->n_bytes, ofs, n_bits); > } > > +static void > +packet_put_ra_rdnss_opt(struct dp_packet *b, uint8_t num, > +ovs_be32 lifetime, const struct in6_addr *dns) > +{ > +size_t prev_l4_size = dp_packet_l4_size(b); > +struct ip6_hdr *nh = dp_packet_l3(b); > +size_t len = 2 * num + 1; > + > +nh->ip6_plen = htons(prev_l4_size + len * 8); > + > +struct nd_rdnss_opt *nd_rdnss = dp_packet_put_uninit(b, sizeof > *nd_rdnss); > +nd_rdnss->type = ND_OPT_RDNSS; > +nd_rdnss->len = len; > +nd_rdnss->reserved = 0; > +put_16aligned_be32(&nd_rdnss->lifetime, lifetime); > + > +for (int i = 0; i < num; i++) { > +dp_packet_put(b, &dns[i], sizeof(ovs_be32[4])); > +} > + > +struct ovs_ra_msg *ra = dp_packet_l4(b); > +ra->icmph.icmp6_cksum = 0; > +uint32_t icmp_csum = packet_csum_pseudoheader6(dp_packet_l3(b)); > +ra->icmph.icmp6_cksum = csum_finish(csum_continue(icmp_csum, ra, > + prev_l4_size + len * > 8)); > +} > + > /* Called with in the pinctrl_handler thread context. */ > static long long int > ipv6_ra_send(struct rconn *swconn, struct ipv6_ra_state *ra) > @@ -2343,6 +2378,10 @@ ipv6_ra_send(struct rconn *swconn, struct > ipv6_ra_state *ra) > ra->config->la_flags, > htonl(IPV6_ND_RA_OPT_PREFIX_VALID_LIFETIME), > htonl(IPV6_ND_RA_OPT_PREFIX_PREFERRED_LIFETIME), addr); > } > +if (ra->config->has_rdnss) { > +packet_put_ra_rdnss_opt(&packet, 1, htonl(0x), > +&ra->config->rdnss); > +} > > uint64_t ofpacts_stub[4096 / 8]; > struct ofpbuf ofpacts = OFPBUF_STUB_INITIALIZER(ofpacts_stub); > diff --git a/lib/ovn-l7.h b/lib/ovn-l7.h > index c93def450..12cee6329 100644 > --- a/lib/ovn-l7.h > +++ b/lib/ovn-l7.h > @@ -220,6 +220,17 @@ struct dhcpv6_opt_ia_na { > ovs_be32 t2; > }); > > +/* RDNSS option RFC 6106 */ > +#define ND_RDNSS_OPT_LEN8 > +#define ND_OPT_RDNSS25 > +struct nd_rdnss_opt { > +uint8_t type; /* ND_OPT_RDNSS. */ > +uint8_t len; /* >= 3. */ > +ovs_be16 reserved;/* Always 0. */ > +ovs_16aligned_be32 lifetime; > +}; > +BUILD_ASSERT_DECL(ND_RDNSS_OPT_LEN == sizeof(struct nd_rdnss_opt)); > + > #define DHCPV6_DUID_LL 3 > #define DHCPV6_HW_TYPE_ETH 1 > > diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c > index ea8ad7c2d..d1de36e08 100644 > --- a/northd/ovn-northd.c > +++ b/northd/ovn-northd.c > @@ -6485,6 +6485,11 @@ copy_ra_to_sb(struct ovn_port *op, const char > *address_mode) > smap_add(&options, "ipv6_ra_prefixes", ds_cstr(&s)); > ds_destroy(&s); > > +const char *rdnss = smap_get(&op->nbrp->ipv6_ra_configs, "rdnss"); > +if (rdnss) { > +smap_add(&options, "ipv6_ra_rdnss", rdnss); > +} > + > smap_add(&options, "ipv6_ra_src_eth", op->lrp_networks.ea_s); > > sbrec_port_binding_set_options(op->sb, &options); > diff --git a/ovn-nb.xml b/ovn-nb.xml > index 1504f8fca..2faf9390b 100644 > --- a/ovn-nb.xml > +++ b/ovn-nb.xml > @@ -1885,6 +1885,11 @@ > is one-third of , > i.e. 200 seconds if that key is unset. > > + > + > +IPv6 address of RDNS