[ovs-dev] [PATCH v5] conntrack: support default timeout policy get/set cmd for netdev datapath

2021-12-14 Thread wenxu
From: wenxu 

Now, the default timeout policy for netdev datapath is hard codeing. In
some case show or modify is needed.
Add command for get/set default timeout policy. Using like this:

ovs-appctl dpctl/ct-get-default-tp [dp]
ovs-appctl dpctl/ct-set-default-tp [dp] policies

Signed-off-by: wenxu 
---
 NEWS |   4 ++
 lib/conntrack-tp.c   |  45 +---
 lib/ct-dpif.c| 107 +++
 lib/ct-dpif.h|  12 +
 lib/dpctl.c  |  70 +
 lib/dpif-netdev.c|  20 
 lib/dpif-netlink.c   |   2 +
 lib/dpif-provider.h  |   7 +++
 tests/system-kmod-macros.at  |  10 
 tests/system-traffic.at  |  67 
 tests/system-userspace-macros.at |   7 +++
 11 files changed, 308 insertions(+), 43 deletions(-)

diff --git a/NEWS b/NEWS
index 2a4856c..dea1044 100644
--- a/NEWS
+++ b/NEWS
@@ -23,6 +23,10 @@ Post-v2.16.0
  * Default selection method for select groups with up to 256 buckets is
now dp_hash.  Previously this was limited to 64 buckets.  This change
is mainly for the benefit of OVN load balancing configurations.
+   - ovs-appctl dpctl/:
+ * New commands 'ct-set-default-tp' and
+   'ct-set-default-tp' that allows to get or configure
+   netdev datapath ct default timeout policy.
 
 
 v2.16.0 - 16 Aug 2021
diff --git a/lib/conntrack-tp.c b/lib/conntrack-tp.c
index a586d3a..9988327 100644
--- a/lib/conntrack-tp.c
+++ b/lib/conntrack-tp.c
@@ -25,12 +25,6 @@
 VLOG_DEFINE_THIS_MODULE(conntrack_tp);
 static struct vlog_rate_limit rl = VLOG_RATE_LIMIT_INIT(1, 5);
 
-static const char *ct_timeout_str[] = {
-#define CT_TIMEOUT(NAME) #NAME,
-CT_TIMEOUTS
-#undef CT_TIMEOUT
-};
-
 /* Default timeout policy in seconds. */
 static unsigned int ct_dpif_netdev_tp_def[] = {
 [CT_DPIF_TP_ATTR_TCP_SYN_SENT] = 30,
@@ -195,41 +189,6 @@ timeout_policy_update(struct conntrack *ct,
 return err;
 }
 
-static enum ct_dpif_tp_attr
-tm_to_ct_dpif_tp(enum ct_timeout tm)
-{
-switch (tm) {
-case CT_TM_TCP_FIRST_PACKET:
-return CT_DPIF_TP_ATTR_TCP_SYN_SENT;
-case CT_TM_TCP_OPENING:
-return CT_DPIF_TP_ATTR_TCP_SYN_RECV;
-case CT_TM_TCP_ESTABLISHED:
-return CT_DPIF_TP_ATTR_TCP_ESTABLISHED;
-case CT_TM_TCP_CLOSING:
-return CT_DPIF_TP_ATTR_TCP_FIN_WAIT;
-case CT_TM_TCP_FIN_WAIT:
-return CT_DPIF_TP_ATTR_TCP_TIME_WAIT;
-case CT_TM_TCP_CLOSED:
-return CT_DPIF_TP_ATTR_TCP_CLOSE;
-case CT_TM_OTHER_FIRST:
-return CT_DPIF_TP_ATTR_UDP_FIRST;
-case CT_TM_OTHER_BIDIR:
-return CT_DPIF_TP_ATTR_UDP_MULTIPLE;
-case CT_TM_OTHER_MULTIPLE:
-return CT_DPIF_TP_ATTR_UDP_SINGLE;
-case CT_TM_ICMP_FIRST:
-return CT_DPIF_TP_ATTR_ICMP_FIRST;
-case CT_TM_ICMP_REPLY:
-return CT_DPIF_TP_ATTR_ICMP_REPLY;
-case N_CT_TM:
-default:
-OVS_NOT_REACHED();
-break;
-}
-OVS_NOT_REACHED();
-return CT_DPIF_TP_ATTR_MAX;
-}
-
 static void
 conn_update_expiration__(struct conntrack *ct, struct conn *conn,
  enum ct_timeout tm, long long now,
@@ -276,7 +235,7 @@ conn_update_expiration(struct conntrack *ct, struct conn 
*conn,
 ovs_mutex_lock(&conn->lock);
 VLOG_DBG_RL(&rl, "Update timeout %s zone=%u with policy id=%d "
 "val=%u sec.",
-ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
+ct_dpif_timeout_string[tm], conn->key.zone, conn->tp_id, val);
 
 conn_update_expiration__(ct, conn, tm, now, val);
 }
@@ -307,7 +266,7 @@ conn_init_expiration(struct conntrack *ct, struct conn 
*conn,
 }
 
 VLOG_DBG_RL(&rl, "Init timeout %s zone=%u with policy id=%d val=%u sec.",
-ct_timeout_str[tm], conn->key.zone, conn->tp_id, val);
+ct_dpif_timeout_string[tm], conn->key.zone, conn->tp_id, val);
 
 conn_init_expiration__(ct, conn, tm, now, val);
 }
diff --git a/lib/ct-dpif.c b/lib/ct-dpif.c
index cfc2315..986f7fb 100644
--- a/lib/ct-dpif.c
+++ b/lib/ct-dpif.c
@@ -20,6 +20,7 @@
 #include 
 
 #include "ct-dpif.h"
+#include "conntrack-private.h"
 #include "openvswitch/ofp-parse.h"
 #include "openvswitch/vlog.h"
 
@@ -180,6 +181,24 @@ ct_dpif_get_tcp_seq_chk(struct dpif *dpif, bool *enabled)
 }
 
 int
+ct_dpif_set_default_timeout_policy(struct dpif *dpif,
+   struct ct_dpif_timeout_policy *tp)
+{
+return (dpif->dpif_class->ct_set_timeout_policy
+? dpif->dpif_class->ct_set_default_timeout_policy(dpif, tp)
+: EOPNOTSUPP);
+}
+
+int
+ct_dpif_get_default_timeout_policy(struct dpif *dpif,
+   struct ct_dpif_timeout_policy *tp)
+{
+return (dpif->dpif_class->ct_get_timeout_policy
+? dpif->dpif_class->ct_get_default_timeout_policy(dpif, tp)
+

Re: [ovs-dev] [PATCH] dpif-netdev: Fix the autovalidator output for the miniflow extract.

2021-12-14 Thread Amber, Kumar
Hi Ilya,

Reviewed and Tested the Patch looks good. Thanks for the Fixes .

> -Original Message-
> From: Ilya Maximets 
> Sent: Tuesday, December 14, 2021 3:49 AM
> To: ovs-dev@openvswitch.org
> Cc: Stokes, Ian ; Amber, Kumar
> ; Ferriter, Cian ; Van
> Haaren, Harry ; Ilya Maximets
> 
> Subject: [PATCH] dpif-netdev: Fix the autovalidator output for the miniflow
> extract.
> 
> The autovalidator uses incorrect block count while printing the miniflow 
> buffer
> from a tested implementation.  This results in not printing fields that was
> incorrectly added to the miniflow or printing more of a buffer even if not
> needed.
> 
> Fix that by requesting and using the correct block count.
> 
> Also fixed the output formatting issues: extra spaces, characters, unclear
> relations between names and numbers due to mixed up delimiters, '%u' used for
> uint16_t, '\t' in the output, etc.
> 
> Fixes: dd3f5d86d983 ("dpif-netdev: Add auto validation function for miniflow
> extract")
> Signed-off-by: Ilya Maximets 
> ---
>  lib/dpif-netdev-private-extract.c | 26 +++---
>  1 file changed, 15 insertions(+), 11 deletions(-)
> 
> diff --git a/lib/dpif-netdev-private-extract.c 
> b/lib/dpif-netdev-private-extract.c
> index 7a06dbf6f..5f29861b8 100644
> --- a/lib/dpif-netdev-private-extract.c
> +++ b/lib/dpif-netdev-private-extract.c
> @@ -295,8 +295,8 @@ dpif_miniflow_extract_autovalidator(struct
> dp_packet_batch *packets,
>  if ((keys[i].mf.map.bits[0] != test_keys[i].mf.map.bits[0]) ||
>  (keys[i].mf.map.bits[1] != test_keys[i].mf.map.bits[1])) {
>  ds_put_format(&log_msg, "Autovalidation map failed\n"
> -  "Good 0x%llx 0x%llx\tTest 0x%llx"
> -  " 0x%llx\n", keys[i].mf.map.bits[0],
> +  "Good: 0x%llx 0x%llxTest: 0x%llx 0x%llx\n",
> +  keys[i].mf.map.bits[0],
>keys[i].mf.map.bits[1],
>test_keys[i].mf.map.bits[0],
>test_keys[i].mf.map.bits[1]); @@ -305,13 
> +305,15 @@
> dpif_miniflow_extract_autovalidator(struct dp_packet_batch *packets,
> 
>  if (!miniflow_equal(&keys[i].mf, &test_keys[i].mf)) {
>  uint32_t block_cnt = miniflow_n_values(&keys[i].mf);
> +uint32_t test_block_cnt =
> + miniflow_n_values(&test_keys[i].mf);
> +
>  ds_put_format(&log_msg, "Autovalidation blocks failed\n"
> -  "nGood hex:\n");
> +  "Good hex:\n");
>  ds_put_hex_dump(&log_msg, &keys[i].buf, block_cnt * 8, 0,
>  false);
>  ds_put_format(&log_msg, "Test hex:\n");
> -ds_put_hex_dump(&log_msg, &test_keys[i].buf, block_cnt * 8, 
> 0,
> -false);
> +ds_put_hex_dump(&log_msg, &test_keys[i].buf,
> +test_block_cnt * 8, 0, false);
>  failed = 1;
>  }
> 
> @@ -320,14 +322,16 @@ dpif_miniflow_extract_autovalidator(struct
> dp_packet_batch *packets,
>  (packet->l2_5_ofs != good_l2_5_ofs[i]) ||
>  (packet->l3_ofs != good_l3_ofs[i]) ||
>  (packet->l4_ofs != good_l4_ofs[i])) {
> -ds_put_format(&log_msg, "Autovalidation packet offsets 
> failed"
> -  "\n");
> -ds_put_format(&log_msg, "Good offsets: l2_pad_size %u,"
> -  " l2_5_ofs : %u l3_ofs %u, l4_ofs %u\n",
> +ds_put_format(&log_msg,
> +  "Autovalidation packet offsets failed\n");
> +ds_put_format(&log_msg, "Good offsets: "
> +  "l2_pad_size: %"PRIu16", l2_5_ofs: %"PRIu16", "
> +  "l3_ofs: %"PRIu16", l4_ofs: %"PRIu16"\n",
>good_l2_pad_size[i], good_l2_5_ofs[i],
>good_l3_ofs[i], good_l4_ofs[i]);
> -ds_put_format(&log_msg, "  Test offsets: l2_pad_size %u,"
> -  " l2_5_ofs : %u l3_ofs %u, l4_ofs %u\n",
> +ds_put_format(&log_msg, "Test offsets: "
> +  "l2_pad_size: %"PRIu16", l2_5_ofs: %"PRIu16", "
> +  "l3_ofs: %"PRIu16", l4_ofs: %"PRIu16"\n",
>packet->l2_pad_size, packet->l2_5_ofs,
>packet->l3_ofs, packet->l4_ofs);
>  failed = 1;
> --
> 2.31.1

Acked-by: Kumar Amber 

Regards
Amber

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4] dpcls: Change info-get function to fetch dpcls usage stats.

2021-12-14 Thread Amber, Kumar
Hi Eelco,

Thanks for the reviews .

We created 2 patches one which I sent here, and one is almost identical to the 
patch suggested by you,
We thought the double pointer was more complicated and hence didn’t send that 
one first.

I will send the updated patch with your Input in sometime 😊.

Regards
Amber

> -Original Message-
> From: Eelco Chaudron 
> Sent: Friday, December 10, 2021 6:49 PM
> To: Amber, Kumar 
> Cc: ovs-dev@openvswitch.org; f...@sysclose.org; Stokes, Ian
> ; Van Haaren, Harry 
> Subject: Re: [PATCH v4] dpcls: Change info-get function to fetch dpcls usage
> stats.
> 
> 
> 
> On 2 Dec 2021, at 11:05, Kumar Amber wrote:
> 
> > Modified the dplcs info-get command output to include the count for
> > different dpcls implementations.
> >
> > $ovs-appctl dpif-netdev/subtable-lookup-prio-get
> >
> > Available dpcls implementations:
> >   autovalidator (Use count: 1, Priority: 5)
> >   generic (Use count: 0, Priority: 1)
> >   avx512_gather (Use count: 0, Priority: 3)
> >
> > Test case to verify changes:
> > 1021: PMD - dpcls configuration ok
> >
> > Signed-off-by: Kumar Amber 
> > Signed-off-by: Harry van Haaren 
> > Co-authored-by: Harry van Haaren 
> >
> > ---
> > v4:
> > - Fix comments on the patch.
> > - Change API from an overloaded method of counting, to returning the
> >   old and new subtable structs. This allows the caller to identify the
> >   modified subtable implementations, and update the statistics accordingly.
> > v3:
> > - Fix comments on the patch.
> > - Function API remains same, see discussion on OVS ML here:
> >   "https://mail.openvswitch.org/pipermail/ovs-dev/2021-
> October/388737.html"
> > v2:
> > - Dependency merged rebased to master.
> >
> > ---
> > ---
> >  Documentation/topics/dpdk/bridge.rst | 16 +++
> >  lib/dpif-netdev-lookup.c | 70 +++-
> >  lib/dpif-netdev-lookup.h | 20 +++-
> >  lib/dpif-netdev.c| 61 +---
> >  tests/pmd.at | 16 +++
> >  5 files changed, 137 insertions(+), 46 deletions(-)
> >
> > diff --git a/Documentation/topics/dpdk/bridge.rst
> > b/Documentation/topics/dpdk/bridge.rst
> > index f645b9ade..63a54da1c 100644
> > --- a/Documentation/topics/dpdk/bridge.rst
> > +++ b/Documentation/topics/dpdk/bridge.rst
> > @@ -156,10 +156,10 @@ OVS provides multiple implementations of dpcls.
> > The following command enables  the user to check what implementations are
> available in a running instance ::
> >
> >  $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
> > -Available lookup functions (priority : name)
> > -0 : autovalidator
> > -1 : generic
> > -0 : avx512_gather
> > +Available dpcls implementations:
> > +autovalidator (Use count: 1, Priority: 5)
> > +generic (Use count: 0, Priority: 1)
> > +avx512_gather (Use count: 0, Priority: 3)
> >
> >  To set the priority of a lookup function, run the ``prio-set`` command ::
> >
> > @@ -172,10 +172,10 @@ function due to the command being run. To verify
> > the prioritization, re-run the  get command, note the updated priority of 
> > the
> ``avx512_gather`` function ::
> >
> >  $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
> > -Available lookup functions (priority : name)
> > -0 : autovalidator
> > -1 : generic
> > -5 : avx512_gather
> > +Available dpcls implementations:
> > +autovalidator (Use count: 0, Priority: 0)
> > +generic (Use count: 0, Priority: 0)
> > +avx512_gather (Use count: 1, Priority: 5)
> >
> >  If two lookup functions have the same priority, the first one in the
> > list is  chosen, and the 2nd occurance of that priority is not used.
> > Put in logical diff --git a/lib/dpif-netdev-lookup.c
> > b/lib/dpif-netdev-lookup.c index bd0a99abe..0aa79e27c 100644
> > --- a/lib/dpif-netdev-lookup.c
> > +++ b/lib/dpif-netdev-lookup.c
> > @@ -36,18 +36,21 @@ static struct dpcls_subtable_lookup_info_t
> subtable_lookups[] = {
> >  { .prio = 0,
> >  #endif
> >.probe = dpcls_subtable_autovalidator_probe,
> > -  .name = "autovalidator", },
> > +  .name = "autovalidator",
> > +  .usage_cnt = ATOMIC_COUNT_INIT(0), },
> >
> >  /* The default scalar C code implementation. */
> >  { .prio = 1,
> >.probe = dpcls_subtable_generic_probe,
> > -  .name = "generic", },
> > +  .name = "generic",
> > +  .usage_cnt = ATOMIC_COUNT_INIT(0), },
> >
> >  #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD &&
> __SSE4_2__)
> >  /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. 
> > */
> >  { .prio = 0,
> >.probe = dpcls_subtable_avx512_gather_probe,
> > -  .name = "avx512_gather", },
> > +  .name = "avx512_gather",
> > +  .usage_cnt = ATOMIC_COUNT_INIT(0), },
> >  #else
> >  /* Disabling AVX512 at compile time, as compile

Re: [ovs-dev] [PATCH v4] netdev-vport : Fix userspace tunnel ioctl(SIOCGIFINDEX) info logs.

2021-12-14 Thread Roi Dayan via dev




On 2021-12-09 3:16 PM, Ilya Maximets wrote:

On 12/8/21 03:57, lin huang wrote:

From: linhuang 

Userspace tunnel doesn't have a valid device in the kernel. So
get_ifindex() function (ioctl) always get error during
adding a port, deleting a port or updating a port status.

The info log is
"2021-08-29T09:17:39.830Z|00059|netdev_linux|INFO|ioctl(SIOCGIFINDEX)
on vxlan_sys_4789 device failed: No such device"

If there are a lot of userspace tunnel ports on a bridge, the
iface_refresh_netdev_status() function will spend a lot of time.

So ignore userspace tunnel port ioctl(SIOCGIFINDEX) operation, just
return -ENODEV.

Signed-off-by: Lin Huang 
Test-by: Mike Pattrick 
---
  lib/netdev-vport.c | 6 ++
  vswitchd/bridge.c  | 2 ++
  2 files changed, 8 insertions(+)



Applied.  Thanks!

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=04%7C01%7Croid%40nvidia.com%7C8865bff084424563a30308d9bb16357f%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637746526329615023%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=53XCrfcyaY1orDpnmjdf7FV5espcWKPElCMDJKVU5Jw%3D&reserved=0



Hi,

We encounter an offload issue with this commit that OVS doesn't add
rules to TC and keeps on with OVS dp.

To reproduce the issue, configure ovs with bridge and 2 ports,
restart openvswitch service and then do ping between the ports.
Removing/readding port to bridge or calling dpctl/dump-flows "fix" the
issue and ovs calls TC again.

From first look it's because br->ofproto->type is not the same pointer
usually set in netdev_ports_insert() which can be dpi_class->type or
static "system" and in the netdev_ports_* functions the code uses
pointer equal instead of strcmp() to check the type.
So netdev_ports_lookup() returns error now.

2021-12-14T10:56:18.240Z|00010|dpif_netlink(handler1)|ERR|XXX 
parse_flow_put 2243 get port for type system ret (nil)


Even after changing all netdev_ports_* to use strcmp() and
netdev_ports_lookup() is ok and we do get to tc flow put.
Looks like we get an error from tc to offload. so checking this part
now.

2021-12-14T11:23:09.230Z|00035|dpif_netlink(handler1)|ERR|failed to 
offload flow: No such device: enp8s0f0_0


So still looking on this but wanted to share about the behavior.

Thanks,
Roi
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v4] netdev-vport : Fix userspace tunnel ioctl(SIOCGIFINDEX) info logs.

2021-12-14 Thread Roi Dayan via dev



On 2021-12-14 1:59 PM, Roi Dayan wrote:



On 2021-12-09 3:16 PM, Ilya Maximets wrote:

On 12/8/21 03:57, lin huang wrote:

From: linhuang 

Userspace tunnel doesn't have a valid device in the kernel. So
get_ifindex() function (ioctl) always get error during
adding a port, deleting a port or updating a port status.

The info log is
"2021-08-29T09:17:39.830Z|00059|netdev_linux|INFO|ioctl(SIOCGIFINDEX)
on vxlan_sys_4789 device failed: No such device"

If there are a lot of userspace tunnel ports on a bridge, the
iface_refresh_netdev_status() function will spend a lot of time.

So ignore userspace tunnel port ioctl(SIOCGIFINDEX) operation, just
return -ENODEV.

Signed-off-by: Lin Huang 
Test-by: Mike Pattrick 
---
  lib/netdev-vport.c | 6 ++
  vswitchd/bridge.c  | 2 ++
  2 files changed, 8 insertions(+)



Applied.  Thanks!

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=04%7C01%7Croid%40nvidia.com%7C8865bff084424563a30308d9bb16357f%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637746526329615023%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=53XCrfcyaY1orDpnmjdf7FV5espcWKPElCMDJKVU5Jw%3D&reserved=0



Hi,

We encounter an offload issue with this commit that OVS doesn't add
rules to TC and keeps on with OVS dp.

To reproduce the issue, configure ovs with bridge and 2 ports,
restart openvswitch service and then do ping between the ports.
Removing/readding port to bridge or calling dpctl/dump-flows "fix" the
issue and ovs calls TC again.

 From first look it's because br->ofproto->type is not the same pointer
usually set in netdev_ports_insert() which can be dpi_class->type or
static "system" and in the netdev_ports_* functions the code uses
pointer equal instead of strcmp() to check the type.
So netdev_ports_lookup() returns error now.

2021-12-14T10:56:18.240Z|00010|dpif_netlink(handler1)|ERR|XXX 
parse_flow_put 2243 get port for type system ret (nil)


Even after changing all netdev_ports_* to use strcmp() and
netdev_ports_lookup() is ok and we do get to tc flow put.
Looks like we get an error from tc to offload. so checking this part
now.

2021-12-14T11:23:09.230Z|00035|dpif_netlink(handler1)|ERR|failed to 
offload flow: No such device: enp8s0f0_0


So still looking on this but wanted to share about the behavior.

Thanks,
Roi


I forgot to mention but just to make it clear,
need to set hw-offload=true.

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH 1/2] netdev-dpdk: Fix statistics when changing Rx/Tx queues count.

2021-12-14 Thread Kevin Traynor

On 02/12/2021 21:16, David Marchand wrote:

When changing number of Rx or Tx queues, per queue basic stats can be
renumbered in DPDK ethdev layer [1].

OVS maintains an internal xstats IDs cache that was refreshed when a
cached id was not valid anymore (in netdev_dpdk_get_custom_stats) or if
a new DPDK port was created.
This did not handle changes of Rx/Tx queues count.

For example, with a mlx5 port:
$ ovs-vsctl set interface dpdk0 options:n_rxq=2
$ ovs-vsctl get interface dpdk0 statistics |
   sed -e 's#[{}]##g' -e 's#, #\n#g' |
   grep rx_q._errors
rx_q0_errors=0

Move the cache filling after reconfiguring and starting the port.
There is no need to flush the cache in netdev_dpdk_get_custom_stats.

While at it, the xstats code can be cleaned up:
- remove wrong or Lapalissade comments,
- don't check x*alloc return value,
- expect that consecutive calls to xstats API return the same number of
   elements,
- only write to dev-> when all DPDK calls succeeded,
- add missing lock annotations to netdev_dpdk_clear_xstats and
   netdev_dpdk_get_xstat_name,

1: https://git.dpdk.org/dpdk/tree/lib/librte_ethdev/rte_ethdev.c?h=v20.11#n2696

Reported-at: 
https://mail.openvswitch.org/pipermail/ovs-dev/2021-November/389456.html
Signed-off-by: David Marchand 


I tested it and the rx q errors were reported for the right number of qs 
when increasing/decreasing num of qs. Code lgtm.


Acked-by: Kevin Traynor 

As a side note, I noticed that xstats only reported for q0-q15, but as 
we discussed that is a DPDK configuration item.



---
  lib/netdev-dpdk.c | 160 ++
  1 file changed, 62 insertions(+), 98 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index ca92c947a2..51bb41551b 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -540,6 +540,7 @@ static void netdev_dpdk_vhost_destruct(struct netdev 
*netdev);
  
  static int netdev_dpdk_get_sw_custom_stats(const struct netdev *,

 struct netdev_custom_stats *);
+static void netdev_dpdk_configure_xstats(struct netdev_dpdk *dev);
  static void netdev_dpdk_clear_xstats(struct netdev_dpdk *dev);
  
  int netdev_dpdk_get_vid(const struct netdev_dpdk *dev);

@@ -1161,6 +1162,8 @@ dpdk_eth_dev_init(struct netdev_dpdk *dev)
  }
  dev->started = true;
  
+netdev_dpdk_configure_xstats(dev);

+
  rte_eth_promiscuous_enable(dev->port_id);
  rte_eth_allmulticast_enable(dev->port_id);
  
@@ -1559,23 +1562,19 @@ netdev_dpdk_dealloc(struct netdev *netdev)
  
  static void

  netdev_dpdk_clear_xstats(struct netdev_dpdk *dev)
+OVS_REQUIRES(dev->mutex)
  {
-/* If statistics are already allocated, we have to
- * reconfigure, as port_id could have been changed. */
-if (dev->rte_xstats_names) {
-free(dev->rte_xstats_names);
-dev->rte_xstats_names = NULL;
-dev->rte_xstats_names_size = 0;
-}
-if (dev->rte_xstats_ids) {
-free(dev->rte_xstats_ids);
-dev->rte_xstats_ids = NULL;
-dev->rte_xstats_ids_size = 0;
-}
+free(dev->rte_xstats_names);
+dev->rte_xstats_names = NULL;
+dev->rte_xstats_names_size = 0;
+free(dev->rte_xstats_ids);
+dev->rte_xstats_ids = NULL;
+dev->rte_xstats_ids_size = 0;
  }
  
-static const char*

+static const char *
  netdev_dpdk_get_xstat_name(struct netdev_dpdk *dev, uint64_t id)
+OVS_REQUIRES(dev->mutex)
  {
  if (id >= dev->rte_xstats_names_size) {
  return "UNKNOWN";
@@ -1583,101 +1582,70 @@ netdev_dpdk_get_xstat_name(struct netdev_dpdk *dev, 
uint64_t id)
  return dev->rte_xstats_names[id].name;
  }
  
-static bool

+static void
  netdev_dpdk_configure_xstats(struct netdev_dpdk *dev)
  OVS_REQUIRES(dev->mutex)
  {
+struct rte_eth_xstat_name *rte_xstats_names = NULL;
+struct rte_eth_xstat *rte_xstats = NULL;
+int rte_xstats_names_size;
  int rte_xstats_len;
-bool ret;
-struct rte_eth_xstat *rte_xstats;
-uint64_t id;
-int xstats_no;
  const char *name;
+uint64_t id;
  
-/* Retrieving all XSTATS names. If something will go wrong

- * or amount of counters will be equal 0, rte_xstats_names
- * buffer will be marked as NULL, and any further xstats
- * query won't be performed (e.g. during netdev_dpdk_get_stats
- * execution). */
+netdev_dpdk_clear_xstats(dev);
  
-ret = false;

-rte_xstats = NULL;
+rte_xstats_names_size = rte_eth_xstats_get_names(dev->port_id, NULL, 0);
+if (rte_xstats_names_size < 0) {
+VLOG_WARN("Cannot get XSTATS names for port: "DPDK_PORT_ID_FMT,
+  dev->port_id);
+goto out;
+}
  
-if (dev->rte_xstats_names == NULL || dev->rte_xstats_ids == NULL) {

-dev->rte_xstats_names_size =
-rte_eth_xstats_get_names(dev->port_id, NULL, 0);
+rte_xstats_names = xcalloc(rte_xstats_names_size,
+   sizeof *rte_xstats_names);
+rte_xstats_len = rt

Re: [ovs-dev] [PATCH v4] netdev-vport : Fix userspace tunnel ioctl(SIOCGIFINDEX) info logs.

2021-12-14 Thread Ilya Maximets
On 12/14/21 13:07, Roi Dayan wrote:
> 
> 
> On 2021-12-14 1:59 PM, Roi Dayan wrote:
>>
>>
>> On 2021-12-09 3:16 PM, Ilya Maximets wrote:
>>> On 12/8/21 03:57, lin huang wrote:
 From: linhuang 

 Userspace tunnel doesn't have a valid device in the kernel. So
 get_ifindex() function (ioctl) always get error during
 adding a port, deleting a port or updating a port status.

 The info log is
 "2021-08-29T09:17:39.830Z|00059|netdev_linux|INFO|ioctl(SIOCGIFINDEX)
 on vxlan_sys_4789 device failed: No such device"

 If there are a lot of userspace tunnel ports on a bridge, the
 iface_refresh_netdev_status() function will spend a lot of time.

 So ignore userspace tunnel port ioctl(SIOCGIFINDEX) operation, just
 return -ENODEV.

 Signed-off-by: Lin Huang 
 Test-by: Mike Pattrick 
 ---
   lib/netdev-vport.c | 6 ++
   vswitchd/bridge.c  | 2 ++
   2 files changed, 8 insertions(+)

>>>
>>> Applied.  Thanks!
>>>
>>> Best regards, Ilya Maximets.
>>> ___
>>> dev mailing list
>>> d...@openvswitch.org
>>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Fmail.openvswitch.org%2Fmailman%2Flistinfo%2Fovs-dev&data=04%7C01%7Croid%40nvidia.com%7C8865bff084424563a30308d9bb16357f%7C43083d15727340c1b7db39efd9ccc17a%7C0%7C0%7C637746526329615023%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000&sdata=53XCrfcyaY1orDpnmjdf7FV5espcWKPElCMDJKVU5Jw%3D&reserved=0
>>
>>
>> Hi,
>>
>> We encounter an offload issue with this commit that OVS doesn't add
>> rules to TC and keeps on with OVS dp.
>>
>> To reproduce the issue, configure ovs with bridge and 2 ports,
>> restart openvswitch service and then do ping between the ports.
>> Removing/readding port to bridge or calling dpctl/dump-flows "fix" the
>> issue and ovs calls TC again.
>>
>>  From first look it's because br->ofproto->type is not the same pointer
>> usually set in netdev_ports_insert() which can be dpi_class->type or
>> static "system" and in the netdev_ports_* functions the code uses
>> pointer equal instead of strcmp() to check the type.
>> So netdev_ports_lookup() returns error now.
>>
>> 2021-12-14T10:56:18.240Z|00010|dpif_netlink(handler1)|ERR|XXX parse_flow_put 
>> 2243 get port for type system ret (nil)
>>
>> Even after changing all netdev_ports_* to use strcmp() and
>> netdev_ports_lookup() is ok and we do get to tc flow put.
>> Looks like we get an error from tc to offload. so checking this part
>> now.
>>
>> 2021-12-14T11:23:09.230Z|00035|dpif_netlink(handler1)|ERR|failed to offload 
>> flow: No such device: enp8s0f0_0
>>
>> So still looking on this but wanted to share about the behavior.
>>
>> Thanks,
>> Roi
> 
> I forgot to mention but just to make it clear,
> need to set hw-offload=true.
> 

Ugh.  Sorry, my fault.  Looking at the patch again now and this version
seems weird.  I don't know WTF I was thinking.  The main thing is that
we're calling netdev_set_dpif_type() twice with different arguments.
First time with br->ofproto->type as a type and second time with
dpif_normalize_type(dpif_type(dpif)).  And these are not the same.
And as you correctly noticed br->ofproto->type is not a constant string
unlike the normalized one, hence cannot be checked by a pointer comparison.


Could you try this:

diff --git a/lib/dpif.c b/lib/dpif.c
index 38bcb47cb..cff0bc2db 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -364,7 +364,8 @@ do_open(const char *name, const char *type, bool create, 
struct dpif **dpifp)
 err = netdev_open(dpif_port.name, dpif_port.type, &netdev);
 
 if (!err) {
-netdev_ports_insert(netdev, dpif_type_str, &dpif_port);
+netdev_set_dpif_type(netdev, dpif_type_str);
+netdev_ports_insert(netdev, &dpif_port);
 netdev_close(netdev);
 } else {
 VLOG_WARN("could not open netdev %s type %s: %s",
@@ -602,10 +603,12 @@ dpif_port_add(struct dpif *dpif, struct netdev *netdev, 
odp_port_t *port_nop)
 const char *dpif_type_str = dpif_normalize_type(dpif_type(dpif));
 struct dpif_port dpif_port;
 
+netdev_set_dpif_type(netdev, dpif_type_str);
+
 dpif_port.type = CONST_CAST(char *, netdev_get_type(netdev));
 dpif_port.name = CONST_CAST(char *, netdev_name);
 dpif_port.port_no = port_no;
-netdev_ports_insert(netdev, dpif_type_str, &dpif_port);
+netdev_ports_insert(netdev, &dpif_port);
 }
 } else {
 VLOG_WARN_RL(&error_rl, "%s: failed to add %s as port: %s",
diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index 8075cfbd8..b8dc108f3 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -573,12 +573,14 @@ netdev_ports_lookup(odp_port_t port_no, const char 
*dpif_type)
 }
 
 int
-netdev_ports_insert(struct netdev *netdev, const char *dpi

Re: [ovs-dev] [PATCH 2/2] netdev-dpdk: Expose per rxq/txq basic statistics.

2021-12-14 Thread Kevin Traynor

On 02/12/2021 21:16, David Marchand wrote:

When troubleshooting multiqueue setups, having per queue statistics helps
checking packets repartition in rx and tx queues.

Per queue statistics are exported by most DPDK drivers (with capability
RTE_ETH_DEV_AUTOFILL_QUEUE_XSTATS).
OVS only filters DPDK statistics, there is nothing to request in DPDK API.
So the only change is to extend the filter on xstats.

Querying statistics with
$ ovs-vsctl get interface dpdk0 statistics |
   sed -e 's#[{}]##g' -e 's#, #\n#g'

and comparing gives:
@@ -13,7 +13,12 @@
  rx_phy_crc_errors=0
  rx_phy_in_range_len_errors=0
  rx_phy_symbol_errors=0
+rx_q0_bytes=0
  rx_q0_errors=0
+rx_q0_packets=0
+rx_q1_bytes=0
  rx_q1_errors=0
+rx_q1_packets=0
  rx_wqe_errors=0
  tx_broadcast_packets=0
  tx_bytes=0
@@ -27,3 +32,13 @@
  tx_pp_rearm_queue_errors=0
  tx_pp_timestamp_future_errors=0
  tx_pp_timestamp_past_errors=0
+tx_q0_bytes=0
+tx_q0_packets=0
+tx_q1_bytes=0
+tx_q1_packets=0
+tx_q2_bytes=0
+tx_q2_packets=0
+tx_q3_bytes=0
+tx_q3_packets=0
+tx_q4_bytes=0
+tx_q4_packets=0

Signed-off-by: David Marchand 
Reviewed-by: Maxime Coquelin 


Tested and working when increasing/decreasing rx/tx queues.

'rx_q15_bytes=0, rx_q15_errors=0, rx_q15_packets=0'

Acked-by: Kevin Traynor 


---
Changes since RFC:
- dropped regex and used simpler string manipulations,

---
  lib/netdev-dpdk.c | 17 ++---
  1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 51bb41551b..1e6079d544 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -1582,6 +1582,16 @@ netdev_dpdk_get_xstat_name(struct netdev_dpdk *dev, 
uint64_t id)
  return dev->rte_xstats_names[id].name;
  }
  
+static bool

+is_queue_stat(const char *s)
+{
+uint16_t tmp;
+
+return (s[0] == 'r' || s[0] == 't') &&
+(ovs_scan(s + 1, "x_q%"SCNu16"_packets", &tmp) ||
+ ovs_scan(s + 1, "x_q%"SCNu16"_bytes", &tmp));
+}
+
  static void
  netdev_dpdk_configure_xstats(struct netdev_dpdk *dev)
  OVS_REQUIRES(dev->mutex)
@@ -1632,9 +1642,10 @@ netdev_dpdk_configure_xstats(struct netdev_dpdk *dev)
  id = rte_xstats[i].id;
  name = netdev_dpdk_get_xstat_name(dev, id);
  
-/* We need to filter out everything except dropped, error and

- * management counters. */
-if (string_ends_with(name, "_errors") ||
+/* For custom stats, we filter out everything except per rxq/txq basic
+ * stats, and dropped, error and management counters. */
+if (is_queue_stat(name) ||
+string_ends_with(name, "_errors") ||
  strstr(name, "_management_") ||
  string_ends_with(name, "_dropped")) {
  



___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 0/5] adding USDT points to OVS

2021-12-14 Thread Eelco Chaudron
This patchset introduces User Statically Defined Trace (USDT)
points to OVS. It included the general infrastructure,
documentation, and some example scripts.

This idea was introduced during the OVS conference 2021, more
information about the presentation can be found here:
http://www.openvswitch.org/support/ovscon2021/#T1

While waiting for the official conference video post, the
pre-recorded video can be found here: https://youtu.be/NrO0YbdTvbg

Eelco Chaudron (5):
  configure: add --enable-usdt option to enable USDT probes
  openvswitch: define the OVS_STATIC_TRACE() macro
  Documentation: add USDT documentation and bpftrace example
  utilities: add upcall USDT probe and associated script
  utilities: add netlink flow operation USDT probes and upcall_cost script


 Documentation/automake.mk|1 +
 Documentation/topics/index.rst   |1 +
 Documentation/topics/usdt-probes.rst |  379 +
 configure.ac |1 +
 include/openvswitch/automake.mk  |1 +
 include/openvswitch/usdt_probes.h|   50 +
 lib/dpif-netlink.c   |   16 +
 lib/dpif.c   |   23 +-
 m4/openvswitch.m4|   28 +
 utilities/automake.mk|   19 +-
 utilities/usdt_scripts/bridge_loop.bt|  120 ++
 utilities/usdt_scripts/upcall_cost.py| 1774 ++
 utilities/usdt_scripts/upcall_monitor.py |  500 ++
 vswitchd/ovs-vswitchd.c  |3 +
 14 files changed, 2901 insertions(+), 15 deletions(-)
 create mode 100644 Documentation/topics/usdt-probes.rst
 create mode 100644 include/openvswitch/usdt_probes.h
 create mode 100755 utilities/usdt_scripts/bridge_loop.bt
 create mode 100755 utilities/usdt_scripts/upcall_cost.py
 create mode 100755 utilities/usdt_scripts/upcall_monitor.py

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 1/5] configure: add --enable-usdt option to enable USDT probes

2021-12-14 Thread Eelco Chaudron
Allow inclusion of User Statically Defined Trace (USDT) probes
in the OVS binaries using the --enable-usdt option to the
./configure script.

Signed-off-by: Eelco Chaudron 
---
 configure.ac  |1 +
 m4/openvswitch.m4 |   28 
 2 files changed, 29 insertions(+)

diff --git a/configure.ac b/configure.ac
index eaa9bf7ee..3e72e28bf 100644
--- a/configure.ac
+++ b/configure.ac
@@ -88,6 +88,7 @@ OVS_CHECK_WIN32
 OVS_CHECK_VISUAL_STUDIO_DDK
 OVS_CHECK_COVERAGE
 OVS_CHECK_NDEBUG
+OVS_CHECK_USDT
 OVS_CHECK_NETLINK
 OVS_CHECK_OPENSSL
 OVS_CHECK_LIBCAPNG
diff --git a/m4/openvswitch.m4 b/m4/openvswitch.m4
index 772825a71..429f151d2 100644
--- a/m4/openvswitch.m4
+++ b/m4/openvswitch.m4
@@ -60,6 +60,34 @@ AC_DEFUN([OVS_CHECK_NDEBUG],
  [ndebug=false])
AM_CONDITIONAL([NDEBUG], [test x$ndebug = xtrue])])
 
+dnl Checks for --enable-usdt-probes and defines HAVE_USDT if it is specified.
+AC_DEFUN([OVS_CHECK_USDT], [
+  AC_ARG_ENABLE(
+[usdt-probes],
+[AC_HELP_STRING([--enable-usdt-probes],
+[Enable User Statically Defined Tracing(USDT) probes])],
+[case "${enableval}" in
+   (yes) usdt=true ;;
+   (no)  usdt=false ;;
+   (*) AC_MSG_ERROR([bad value ${enableval} for --enable-usdt-probes]) ;;
+ esac],
+[usdt=false])
+
+  AC_MSG_CHECKING([whether USDT probes are enabled])
+  if test "$usdt" != true; then
+AC_MSG_RESULT([no])
+  else
+AC_MSG_RESULT([yes])
+
+AC_CHECK_HEADER([sys/sdt.h], [],
+  [AC_MSG_ERROR([unable to find sys/sdt.h needed for USDT support])])
+
+AC_DEFINE([HAVE_USDT_PROBES], [1],
+  [Define to 1 if USDT probes are enabled.])
+  fi
+  AM_CONDITIONAL([HAVE_USDT_PROBES], [test $usdt = true])
+])
+
 dnl Checks for MSVC x64 compiler.
 AC_DEFUN([OVS_CHECK_WIN64],
   [AC_CACHE_CHECK(

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 2/5] openvswitch: define the OVS_STATIC_TRACE() macro

2021-12-14 Thread Eelco Chaudron
This patch defines the OVS_STATIC_TRACE() macro, and as an
example, adds two of them in the bridge run loop.

Signed-off-by: Eelco Chaudron 
---
 include/openvswitch/automake.mk   |1 +
 include/openvswitch/usdt_probes.h |   50 +
 vswitchd/ovs-vswitchd.c   |3 ++
 3 files changed, 54 insertions(+)
 create mode 100644 include/openvswitch/usdt_probes.h

diff --git a/include/openvswitch/automake.mk b/include/openvswitch/automake.mk
index 1fa6d88fa..317dc404f 100644
--- a/include/openvswitch/automake.mk
+++ b/include/openvswitch/automake.mk
@@ -43,6 +43,7 @@ openvswitchinclude_HEADERS = \
include/openvswitch/tun-metadata.h \
include/openvswitch/type-props.h \
include/openvswitch/types.h \
+   include/openvswitch/usdt_probes.h \
include/openvswitch/util.h \
include/openvswitch/uuid.h \
include/openvswitch/version.h \
diff --git a/include/openvswitch/usdt_probes.h 
b/include/openvswitch/usdt_probes.h
new file mode 100644
index 0..2f8882bd4
--- /dev/null
+++ b/include/openvswitch/usdt_probes.h
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2021 Red Hat, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef OPENVSWITCH_USDT_PROBES_H
+#define OPENVSWITCH_USDT_PROBES_H 1
+
+#ifdef HAVE_USDT_PROBES
+#include 
+#endif
+
+#ifdef  __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_USDT_PROBES
+
+#define GET_DTRACE_FUNC(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10,  \
+NAME, ...) NAME
+
+#define OVS_USDT_PROBE(provider, name, ...)   \
+GET_DTRACE_FUNC(_0, ##__VA_ARGS__, DTRACE_PROBE10, DTRACE_PROBE9, \
+DTRACE_PROBE8, DTRACE_PROBE7, DTRACE_PROBE6,  \
+DTRACE_PROBE5, DTRACE_PROBE4, DTRACE_PROBE3,  \
+DTRACE_PROBE2, DTRACE_PROBE1, DTRACE_PROBE)   \
+(provider, name, ##__VA_ARGS__)
+
+#else
+
+#define OVS_USDT_PROBE(...)
+
+#endif
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* usdt_probes.h */
diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c
index f007f9c0b..932a1ba0b 100644
--- a/vswitchd/ovs-vswitchd.c
+++ b/vswitchd/ovs-vswitchd.c
@@ -50,6 +50,7 @@
 #include "util.h"
 #include "openvswitch/vconn.h"
 #include "openvswitch/vlog.h"
+#include "openvswitch/usdt_probes.h"
 #include "lib/vswitch-idl.h"
 #include "lib/dns-resolve.h"
 
@@ -115,6 +116,7 @@ main(int argc, char *argv[])
 exiting = false;
 cleanup = false;
 while (!exiting) {
+OVS_USDT_PROBE(main, run_start);
 memory_run();
 if (memory_should_report()) {
 struct simap usage;
@@ -135,6 +137,7 @@ main(int argc, char *argv[])
 if (exiting) {
 poll_immediate_wake();
 }
+OVS_USDT_PROBE(main, poll_block);
 poll_block();
 if (should_service_stop()) {
 exiting = true;

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH 3/5] Documentation: add USDT documentation and bpftrace example

2021-12-14 Thread Eelco Chaudron
Add the USDT documentation and a bpftrace example using the
bridge run USDT probes.

Signed-off-by: Eelco Chaudron 
---
 Documentation/automake.mk |1 
 Documentation/topics/index.rst|1 
 Documentation/topics/usdt-probes.rst  |  267 +
 utilities/automake.mk |   13 +-
 utilities/usdt_scripts/bridge_loop.bt |  120 +++
 5 files changed, 396 insertions(+), 6 deletions(-)
 create mode 100644 Documentation/topics/usdt-probes.rst
 create mode 100755 utilities/usdt_scripts/bridge_loop.bt

diff --git a/Documentation/automake.mk b/Documentation/automake.mk
index 137cc57c5..9b9c72cff 100644
--- a/Documentation/automake.mk
+++ b/Documentation/automake.mk
@@ -58,6 +58,7 @@ DOC_SOURCE = \
Documentation/topics/record-replay.rst \
Documentation/topics/tracing.rst \
Documentation/topics/userspace-tso.rst \
+   Documentation/topics/usdt-probes.rst \
Documentation/topics/windows.rst \
Documentation/howto/index.rst \
Documentation/howto/dpdk.rst \
diff --git a/Documentation/topics/index.rst b/Documentation/topics/index.rst
index d8ccbd757..7ca364340 100644
--- a/Documentation/topics/index.rst
+++ b/Documentation/topics/index.rst
@@ -55,3 +55,4 @@ OVS
userspace-tso
idl-compound-indexes
ovs-extensions
+   usdt-probes
diff --git a/Documentation/topics/usdt-probes.rst 
b/Documentation/topics/usdt-probes.rst
new file mode 100644
index 0..38acf7d02
--- /dev/null
+++ b/Documentation/topics/usdt-probes.rst
@@ -0,0 +1,267 @@
+..
+  Licensed under the Apache License, Version 2.0 (the "License"); you may
+  not use this file except in compliance with the License. You may obtain
+  a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+  WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+  License for the specific language governing permissions and limitations
+  under the License.
+
+  Convention for heading levels in Open vSwitch documentation:
+
+  ===  Heading 0 (reserved for the title in a document)
+  ---  Heading 1
+  ~~~  Heading 2
+  +++  Heading 3
+  '''  Heading 4
+
+  Avoid deeper levels because they do not render well.
+
+=
+User Statically-Defined Tracing (USDT) probes
+=
+
+Sometimes it's desired to troubleshoot one of OVS's components in the field.
+One of the techniques used is to add dynamic tracepoints, for example using
+perf_. However, the desired dynamic tracepoint and/or the desired variable,
+might not be available due to compiler optimizations.
+
+In this case, a well-thought-off, static tracepoint could be permanently added,
+so it's always available. For OVS we use the DTrace probe macro's, which have
+little to no overhead when disabled. Various tools exist to enable them. See
+some examples below.
+
+
+Compiling with USDT probes enabled
+--
+
+Since USDT probes are compiled out by default, a compile-time option is
+available to include them. To add the probes to the generated code, use the
+following configure option ::
+
+$ ./configure --enable-usdt-probes
+
+The following line should be seen in the configure output when the above option
+is used ::
+
+checking whether USDT probes are enabled... yes
+
+
+Listing available probes
+
+
+There are various ways to display USDT probes available in a userspace
+application. Here we show three examples. All assuming ovs-vswitchd is in the
+search path with USDT probes enabled:
+
+You can use the **perf** tool as follows ::
+
+$ perf buildid-cache --add $(which ovs-vswitchd)
+$ perf list | grep sdt_
+  sdt_main:poll_block[SDT event]
+  sdt_main:run_start [SDT event]
+
+You can use the bpftrace_ tool ::
+
+# bpftrace -l "usdt:$(which ovs-vswitchd):*"
+usdt:/usr/sbin/ovs-vswitchd:main:poll_block
+usdt:/usr/sbin/ovs-vswitchd:main:run_start
+
+NOTE: If you execute this on a running process,
+``bpftrace -lp $(pidof ovs-vswitchd) "usdt:*"`` , it will list all USDT events,
+i.e., also the ones available in the used shared libraries.
+
+Finally, you can use the **tplist** tool which is part of the bcc_ framework ::
+
+$ /usr/share/bcc/tools/tplist -vv -l $(which ovs-vswitchd)
+b'main':b'poll_block' [sema 0x0]
+  location #1 b'/usr/sbin/ovs-vswitchd' 0x407fdc
+b'main':b'run_start' [sema 0x0]
+  location #1 b'/usr/sbin/ovs-vswitchd' 0x407ff6
+
+
+Using probes
+
+
+We will use the OVS sandbox environment in combination with the probes shown
+above to try out some of the availa

[ovs-dev] [PATCH 4/5] utilities: add upcall USDT probe and associated script

2021-12-14 Thread Eelco Chaudron
Added the dpif_recv:recv_upcall USDT probe, which is used by the
included upcall_monitor.py script. This script receives all upcall
packets sent by the kernel to ovs-vswitchd. By default, it will
show all  upcall events, which looks something like this:

 TIME   CPU  COMM  PID  DPIF_NAME  TYPE PKT_LEN 
FLOW_KEY_LEN
 5952147.003848809  2handler4  1381158  system@ovs-system  098  132
 5952147.003879643  2handler4  1381158  system@ovs-system  070  160
 5952147.003914924  2handler4  1381158  system@ovs-system  098  152

It can also dump the packet and NetLink content, and if required,
the packets can also be written to a pcap file.

Signed-off-by: Eelco Chaudron 
---
 Documentation/topics/usdt-probes.rst |   26 ++
 lib/dpif.c   |   23 +
 utilities/automake.mk|6 
 utilities/usdt_scripts/upcall_monitor.py |  500 ++
 4 files changed, 545 insertions(+), 10 deletions(-)
 create mode 100755 utilities/usdt_scripts/upcall_monitor.py

diff --git a/Documentation/topics/usdt-probes.rst 
b/Documentation/topics/usdt-probes.rst
index 38acf7d02..1f6446e12 100644
--- a/Documentation/topics/usdt-probes.rst
+++ b/Documentation/topics/usdt-probes.rst
@@ -200,10 +200,36 @@ used naming convention.
 
 Available probes in ``ovs_vswitchd``:
 
+- dpif_recv:recv_upcall
 - main:poll_block
 - main:run_start
 
 
+probe dpif_recv:recv_upcall
+~~~
+
+**Description**:
+
+This probe gets triggered when the datapath independent layer gets notified
+that a packet needs to be processed by userspace. This allows the probe to
+intercept all packets sent by the kernel to ``ovs-vswitchd``. The
+``upcall_monitor.py`` script uses this probe to display and capture all packets
+sent to ``ovs-vswitchd``.
+
+**Arguments**:
+
+- *arg0*: ``(struct dpif *)->full_name``
+- *arg1*: ``(struct dpif_upcall *)->type``
+- *arg2*: ``dp_packet_data((struct dpif_upcall *)->packet)``
+- *arg3*: ``dp_packet_size((struct dpif_upcall *)->packet)``
+- *arg4*: ``(struct dpif_upcall *)->key``
+- *arg5*: ``(struct dpif_upcall *)->key_len``
+
+**Script references**:
+
+- ``/utilities/usdt_scripts/upcall_monitor.py``
+
+
 probe main:run_start
 
 
diff --git a/lib/dpif.c b/lib/dpif.c
index 38bcb47cb..ab75b3c17 100644
--- a/lib/dpif.c
+++ b/lib/dpif.c
@@ -15,7 +15,6 @@
  */
 
 #include 
-#include "dpif-provider.h"
 
 #include 
 #include 
@@ -24,22 +23,19 @@
 #include 
 
 #include "coverage.h"
-#include "dpctl.h"
 #include "dp-packet.h"
+#include "dpctl.h"
 #include "dpif-netdev.h"
-#include "openvswitch/dynamic-string.h"
+#include "dpif-provider.h"
 #include "flow.h"
+#include "netdev-provider.h"
 #include "netdev.h"
 #include "netlink.h"
 #include "odp-execute.h"
 #include "odp-util.h"
-#include "openvswitch/ofp-print.h"
-#include "openvswitch/ofpbuf.h"
 #include "packets.h"
-#include "openvswitch/poll-loop.h"
 #include "route-table.h"
 #include "seq.h"
-#include "openvswitch/shash.h"
 #include "sset.h"
 #include "timeval.h"
 #include "tnl-neigh-cache.h"
@@ -47,9 +43,14 @@
 #include "util.h"
 #include "uuid.h"
 #include "valgrind.h"
+#include "openvswitch/dynamic-string.h"
 #include "openvswitch/ofp-errors.h"
+#include "openvswitch/ofp-print.h"
+#include "openvswitch/ofpbuf.h"
+#include "openvswitch/poll-loop.h"
+#include "openvswitch/shash.h"
+#include "openvswitch/usdt_probes.h"
 #include "openvswitch/vlog.h"
-#include "lib/netdev-provider.h"
 
 VLOG_DEFINE_THIS_MODULE(dpif);
 
@@ -1602,6 +1603,12 @@ dpif_recv(struct dpif *dpif, uint32_t handler_id, struct 
dpif_upcall *upcall,
 if (dpif->dpif_class->recv) {
 error = dpif->dpif_class->recv(dpif, handler_id, upcall, buf);
 if (!error) {
+OVS_USDT_PROBE(dpif_recv, recv_upcall, dpif->full_name,
+   upcall->type,
+   dp_packet_data(&upcall->packet),
+   dp_packet_size(&upcall->packet),
+   upcall->key, upcall->key_len);
+
 dpif_print_packet(dpif, upcall);
 } else if (error != EAGAIN) {
 log_operation(dpif, "recv", error);
diff --git a/utilities/automake.mk b/utilities/automake.mk
index 55c7b0022..382f8e789 100644
--- a/utilities/automake.mk
+++ b/utilities/automake.mk
@@ -62,7 +62,8 @@ EXTRA_DIST += \
utilities/docker/create_ovs_db.sh \
utilities/docker/debian/Dockerfile \
utilities/docker/debian/build-kernel-modules.sh \
-   utilities/usdt_scripts/bridge_loop.bt
+   utilities/usdt_scripts/bridge_loop.bt \
+   utilities/usdt_scripts/upcall_monitor.py
 MAN_ROOTS += \
utilities/ovs-testcontroller.8.in \
utilities/ovs-dpctl.8.in \
@@ -129,6 +130,7 @@ FLAKE8_PYFILES += utilities/ovs-pcap.in \
utilities/checkpatch.py utilities/ovs-dev.py \
utilities/ovs-check-dead-ifs.in \
utilities/ovs-tcpdump.in \
-   utilitie

[ovs-dev] [PATCH 5/5] utilities: add netlink flow operation USDT probes and upcall_cost script

2021-12-14 Thread Eelco Chaudron
This patch adds a series of NetLink flow operation USDT probes.
These probes are in turn used in the upcall_cost Python script,
which in addition of some kernel tracepoints, give an insight into
the time spent on processing upcall.

Signed-off-by: Eelco Chaudron 
---
 Documentation/topics/usdt-probes.rst  |   86 ++
 lib/dpif-netlink.c|   16 
 utilities/automake.mk |4 
 utilities/usdt_scripts/upcall_cost.py | 1774 +
 4 files changed, 1879 insertions(+), 1 deletion(-)
 create mode 100755 utilities/usdt_scripts/upcall_cost.py

diff --git a/Documentation/topics/usdt-probes.rst 
b/Documentation/topics/usdt-probes.rst
index 1f6446e12..cfe57f1a3 100644
--- a/Documentation/topics/usdt-probes.rst
+++ b/Documentation/topics/usdt-probes.rst
@@ -200,11 +200,96 @@ used naming convention.
 
 Available probes in ``ovs_vswitchd``:
 
+- dpif_netlink_operate\_\_:op_flow_del
+- dpif_netlink_operate\_\_:op_flow_execute
+- dpif_netlink_operate\_\_:op_flow_get
+- dpif_netlink_operate\_\_:op_flow_put
 - dpif_recv:recv_upcall
 - main:poll_block
 - main:run_start
 
 
+dpif_netlink_operate\_\_:op_flow_del
+
+
+**Description**:
+
+This probe gets triggered when the Netlink datapath is about to execute the
+DPIF_OP_FLOW_DEL operation as part of the dpif ``operate()`` callback.
+
+**Arguments**:
+
+- *arg0*: ``(struct dpif_netlink *) dpif``
+- *arg1*: ``(struct dpif_flow_del *) del``
+- *arg2*: ``(struct dpif_netlink_flow *) flow``
+- *arg3*: ``(struct ofpbuf *) aux->request``
+
+**Script references**:
+
+- *None*
+
+
+dpif_netlink_operate\_\_:op_flow_execute
+
+
+**Description**:
+
+This probe gets triggered when the Netlink datapath is about to execute the
+DPIF_OP_FLOW_EXECUTE operation as part of the dpif ``operate()`` callback.
+
+**Arguments**:
+
+- *arg0*: ``(struct dpif_netlink *) dpif``
+- *arg1*: ``(struct dpif_execute *) op->execute``
+- *arg2*: ``dp_packet_data(op->execute.packet)``
+- *arg3*: ``dp_packet_size(op->execute.packet)``
+- *arg4*: ``(struct ofpbuf *) aux->request``
+
+**Script references**:
+
+- ``/utilities/usdt_scripts/upcall_cost.py``
+
+
+dpif_netlink_operate\_\_:op_flow_get
+
+
+**Description**:
+
+This probe gets triggered when the Netlink datapath is about to execute the
+DPIF_OP_FLOW_GET operation as part of the dpif ``operate()`` callback.
+
+**Arguments**:
+
+- *arg0*: ``(struct dpif_netlink *) dpif``
+- *arg1*: ``(struct dpif_flow_get *) get``
+- *arg2*: ``(struct dpif_netlink_flow *) flow``
+- *arg3*: ``(struct ofpbuf *) aux->request``
+
+**Script references**:
+
+- *None*
+
+
+dpif_netlink_operate\_\_:op_flow_put
+
+
+**Description**:
+
+This probe gets triggered when the Netlink datapath is about to execute the
+DPIF_OP_FLOW_PUT operation as part of the dpif ``operate()`` callback.
+
+**Arguments**:
+
+- *arg0*: ``(struct dpif_netlink *) dpif``
+- *arg1*: ``(struct dpif_flow_put *) put``
+- *arg2*: ``(struct dpif_netlink_flow *) flow``
+- *arg3*: ``(struct ofpbuf *) aux->request``
+
+**Script references**:
+
+- ``/utilities/usdt_scripts/upcall_cost.py``
+
+
 probe dpif_recv:recv_upcall
 ~~~
 
@@ -227,6 +312,7 @@ sent to ``ovs-vswitchd``.
 
 **Script references**:
 
+- ``/utilities/usdt_scripts/upcall_cost.py``
 - ``/utilities/usdt_scripts/upcall_monitor.py``
 
 
diff --git a/lib/dpif-netlink.c b/lib/dpif-netlink.c
index 424a28401..52c229568 100644
--- a/lib/dpif-netlink.c
+++ b/lib/dpif-netlink.c
@@ -56,6 +56,7 @@
 #include "openvswitch/poll-loop.h"
 #include "openvswitch/shash.h"
 #include "openvswitch/thread.h"
+#include "openvswitch/usdt_probes.h"
 #include "openvswitch/vlog.h"
 #include "packets.h"
 #include "random.h"
@@ -2052,6 +2053,9 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
 aux->txn.reply = &aux->reply;
 }
 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
+
+OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_put,
+   dpif, put, &flow, &aux->request);
 break;
 
 case DPIF_OP_FLOW_DEL:
@@ -2062,6 +2066,9 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
 aux->txn.reply = &aux->reply;
 }
 dpif_netlink_flow_to_ofpbuf(&flow, &aux->request);
+
+OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_del,
+   dpif, del, &flow, &aux->request);
 break;
 
 case DPIF_OP_EXECUTE:
@@ -2082,6 +2089,12 @@ dpif_netlink_operate__(struct dpif_netlink *dpif,
 } else {
 dpif_netlink_encode_execute(dpif->dp_ifindex, &op->execute,
 &aux->request);
+
+OVS_USDT_PROBE(dpif_netlink_operate__, op_flow_execute,
+   dpif, &op->execute,
+   dp_p

Re: [ovs-dev] [PATCH v3 2/4] system-dpdk: Use dummy-pmd port for packet injection.

2021-12-14 Thread David Marchand
On Mon, Dec 6, 2021 at 11:16 PM Ilya Maximets  wrote:
>
> On 11/30/21 16:00, David Marchand wrote:
> > net_pcap is not always available in DPDK (like, in a dev
> > environment when you forgot to install the libpcap-devel).
> > On the other hand, OVS already has its own way to inject packets into a
> > bridge. Let's make use of it.
> >
> > This solution is slower than net/pcap DPDK, so lower the number of
> > expected packets so that it runs in OVS_CTL_TIMEOUT seconds.
> >
> > While at it, convert "known" packets from pcap to scapy so that the
> > injected packets can be updated without having to read/write a pcap file.
> >
> > Note: this change also (avoids/) fixes a python exception in PcapWriter
> > with scapy 2.4.3 that comes from EPEL.
> >
> > Suggested-by: Ilya Maximets 
> > Signed-off-by: David Marchand 
> > ---
> > Changes since v2:
> > - updated documentation,
> > - cleaned tests/automake.mk,
> > - fixed shebang in python script,
> > - added missing check for scapy availability,
> >
> > Changes since v1:
> > - renamed generator script,
> > - decreased packet count for fuzzy test,
> > - simplified wait expression for packet count,
> >
> > ---
> >  Documentation/topics/dpdk/bridge.rst |   7 ++--
> >  tests/automake.mk|   7 +---
> >  tests/genpkts.py |  56 +++
> >  tests/mfex_fuzzy.py  |  32 ---
> >  tests/pcap/mfex_test.pcap| Bin 416 -> 0 bytes
> >  tests/system-dpdk-macros.at  |   4 +-
> >  tests/system-dpdk.at |  33 +---
> >  7 files changed, 89 insertions(+), 50 deletions(-)
> >  create mode 100755 tests/genpkts.py
> >  delete mode 100755 tests/mfex_fuzzy.py
> >  delete mode 100644 tests/pcap/mfex_test.pcap
> >
> > diff --git a/Documentation/topics/dpdk/bridge.rst 
> > b/Documentation/topics/dpdk/bridge.rst
> > index f645b9ade5..648ce203eb 100644
> > --- a/Documentation/topics/dpdk/bridge.rst
> > +++ b/Documentation/topics/dpdk/bridge.rst
> > @@ -408,7 +408,7 @@ Fuzzy tests can also be done on miniflow extract with 
> > the help of
> >  auto-validator and Scapy. The steps below describes the steps to
> >  reproduce the setup with IP being fuzzed to generate packets.
> >
> > -Scapy is used to create fuzzy IP packets and save them into a PCAP ::
> > +Scapy is used to create fuzzy IP packets (see tests/genpkts.py) ::
> >
> >  pkt = fuzz(Ether()/IP()/TCP())
> >
> > @@ -418,9 +418,8 @@ Set the miniflow extract to autovalidator using ::
> >
> >  OVS is configured to receive the generated packets ::
> >
> > -$ ovs-vsctl add-port br0 pcap0 -- \
> > -set Interface pcap0 type=dpdk options:dpdk-devargs=net_pcap0
> > -"rx_pcap=fuzzy.pcap"
> > +$ ovs-vsctl add-port br0 p1 -- \
> > +set Interface p1 type=dummy-pmd
> >
> >  With this workflow, the autovalidator will ensure that all MFEX
> >  implementations are classifying each packet in exactly the same way.
>
> I'm actually getting a hard time understanding a value of this section
> of the doc for the end user, who is the target audience for this doc.
>
> And the next section "Unit Fuzzy test with Autovalidator" is incorrectly
> formatted and also almost to the word equal to the "Unit Test Miniflow
> Extract" section.
>
> It doesn't make a lot of sense talking that functionality is covered by
> unit tests.  Everything should be covered by unit tests by default.
> And this is definitely not something that should be part of an end-user
> high-level documentation.
>
> It's not a problem of a current patch set, but I'd suggest to just
> remove the last 3 sections of dpdk/bridge.rst.  They should not be there.

I guess this comment is covered by your patch that refactors the
documentation, correct?
I'll still update this part in my patch (and rebase if there is a
conflict once your doc patch is merged).


>
> > diff --git a/tests/automake.mk b/tests/automake.mk
> > index 43731d0973..3bc74a5aff 100644
> > --- a/tests/automake.mk
> > +++ b/tests/automake.mk
> > @@ -143,11 +143,6 @@ $(srcdir)/tests/fuzz-regression-list.at: 
> > tests/automake.mk
> >   echo "TEST_FUZZ_REGRESSION([$$basename])"; \
> >   done > $@.tmp && mv $@.tmp $@
> >
> > -EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
> > -MFEX_AUTOVALIDATOR_TESTS = \
> > - tests/pcap/mfex_test.pcap \
> > - tests/mfex_fuzzy.py
> > -
> >  OVSDB_CLUSTER_TESTSUITE_AT = \
> >   tests/ovsdb-cluster-testsuite.at \
> >   tests/ovsdb-execution.at \
> > @@ -518,7 +513,7 @@ tests_test_type_props_SOURCES = tests/test-type-props.c
> >  CHECK_PYFILES = \
> >   tests/appctl.py \
> >   tests/flowgen.py \
> > - tests/mfex_fuzzy.py \
> > + tests/genpkts.py \
> >   tests/ovsdb-monitor-sort.py \
> >   tests/test-daemon.py \
> >   tests/test-json.py \
> > diff --git a/tests/genpkts.py b/tests/genpkts.py
> > new file mode 100755
> > index 00..f64f786ccb
> > --- /dev/null
> > +++ b/tests/genpkts.py
> > @@ -0

Re: [ovs-dev] [PATCH 3/5] Documentation: add USDT documentation and bpftrace example

2021-12-14 Thread 0-day Robot
Bleep bloop.  Greetings Eelco Chaudron, I am a robot and I have tried out your 
patch.
Thanks for your contribution.

I encountered some error that I wasn't expecting.  See the details below.


checkpatch:
WARNING: Line is 108 characters long (recommended limit is 79)
#310 FILE: Documentation/topics/usdt-probes.rst:264:
.. _perf : 
https://developers.redhat.com/blog/2020/05/29/debugging-vhost-user-tx-contention-in-open-vswitch#

Lines checked: 466, Warnings: 1, Errors: 0


Please check this out.  If you feel there has been an error, please email 
acon...@redhat.com

Thanks,
0-day Robot
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] docs: Re-work the documentation around CPU ISA optimizations.

2021-12-14 Thread Ilya Maximets
On 12/13/21 18:19, David Marchand wrote:
> On Fri, Dec 10, 2021 at 8:36 PM Ilya Maximets  wrote:
>>
>> Few problems with a current documentation:
>>
>> 1. bridge.rst is the high-level documentation for the end user.
>>Unit testing and complex implementation details are for developers,
>>hence should not be there.  Testing instructions for developers
>>should be in testing.rst.  Words in the doc should be understandable
>>for the user who doesn't know OVS internals.
>>
>> 2. Some paragraphs in the current documentation are repeating each
>>other almost to the word.
>>
>> 3. Some paragraphs are incorrectly formatted.  That affects the
>>rendering.
>>
>> 4. There is no point describing every separate test of a system-dpdk
>>testsuite.
>>
>> What is done:
>>
>> 1. All the testing related paragraphs are consolidated and moved
>>to the testing.rst.
>>
>> 2. Most of abbreviations replaced with more readable and understandable
>>for the end user words.
>>
>> 3. Meaning or the purpose of several sentences I failed to understand,
>>therefore just deleted.
>>
>> 4. Fixed formatting and a few typos along the way.
>>
>> IMO, some parts of the doc still needs some re-wording, but this change
>> provides at least a starting point for improvement setting a better
>> structure for the document.
> 
> +1.
> 
>>
>> Signed-off-by: Ilya Maximets 
> 
> I don't mind if you drop some of my suggestion / nits (see inline ),
> this patch enhances the documentation anyway.
> 
> Reviewed-by: David Marchand 

Thanks, David!  See replies inline.

> 
> 
>> ---
>>  Documentation/topics/dpdk/bridge.rst | 248 ++-
>>  Documentation/topics/testing.rst |  59 ++-
>>  2 files changed, 113 insertions(+), 194 deletions(-)
>>
>> diff --git a/Documentation/topics/dpdk/bridge.rst 
>> b/Documentation/topics/dpdk/bridge.rst
>> index f645b9ade..d9cc86268 100644
>> --- a/Documentation/topics/dpdk/bridge.rst
>> +++ b/Documentation/topics/dpdk/bridge.rst
>> @@ -153,7 +153,7 @@ In OVS v2.14 runtime CPU detection was introduced to 
>> enable identifying if
>>  these CPU ISA additions are available, and to allow the user to enable them.
>>
>>  OVS provides multiple implementations of dpcls. The following command 
>> enables
>> -the user to check what implementations are available in a running instance 
>> ::
>> +the user to check what implementations are available in a running instance::
>>
>>  $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
>>  Available lookup functions (priority : name)
>> @@ -161,7 +161,7 @@ the user to check what implementations are available in 
>> a running instance ::
>>  1 : generic
>>  0 : avx512_gather
>>
>> -To set the priority of a lookup function, run the ``prio-set`` command ::
>> +To set the priority of a lookup function, run the ``prio-set`` command::
>>
>>  $ ovs-appctl dpif-netdev/subtable-lookup-prio-set avx512_gather 5
>>  Lookup priority change affected 1 dpcls ports and 1 subtables.
>> @@ -169,7 +169,7 @@ To set the priority of a lookup function, run the 
>> ``prio-set`` command ::
>>  The highest priority lookup function is used for classification, and the 
>> output
>>  above indicates that one subtable of one DPCLS port is has changed its 
>> lookup
>>  function due to the command being run. To verify the prioritization, re-run 
>> the
>> -get command, note the updated priority of the ``avx512_gather`` function ::
>> +get command, note the updated priority of the ``avx512_gather`` function::
>>
>>  $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
>>  Available lookup functions (priority : name)
>> @@ -185,24 +185,24 @@ best candidate.
>>  Optimizing Specific Subtable Search
>>  ~~~
>>
>> -During the packet classification, the datapath can use specialized
>> -lookup tables to optimize the search. However, not all situations
>> -are optimized. If you see a message like the following one in the OVS
>> -logs, it means that there is no specialized implementation available
>> -for the current networking traffic. In this case, OVS will continue
>> -to process the traffic normally using a more generic lookup table."
>> +During the packet classification, the datapath can use specialized lookup
>> +tables to optimize the search. However, not all situations are optimized.  
>> If
> 
> Nit: I would add a space before However.

ok.

> 
> 
>> +you see a message like the following one in the OVS logs, it means that 
>> there
>> +is no specialized implementation available for the current networking 
>> traffic.
>> +In this case, OVS will continue to process the traffic normally using a more
>> +generic lookup table.::
> 
> I don't really get the intention for this part.
> 
> This is rendered as "generic lookup table.:" and I don't get the
> relation with the sentence below.
> So I would simply end this sentence above with a .


I think, the sentences should be swapped.  Something l

Re: [ovs-dev] [PATCH v3 2/4] system-dpdk: Use dummy-pmd port for packet injection.

2021-12-14 Thread Ilya Maximets
On 12/14/21 16:38, David Marchand wrote:
> On Mon, Dec 6, 2021 at 11:16 PM Ilya Maximets  wrote:
>>
>> On 11/30/21 16:00, David Marchand wrote:
>>> net_pcap is not always available in DPDK (like, in a dev
>>> environment when you forgot to install the libpcap-devel).
>>> On the other hand, OVS already has its own way to inject packets into a
>>> bridge. Let's make use of it.
>>>
>>> This solution is slower than net/pcap DPDK, so lower the number of
>>> expected packets so that it runs in OVS_CTL_TIMEOUT seconds.
>>>
>>> While at it, convert "known" packets from pcap to scapy so that the
>>> injected packets can be updated without having to read/write a pcap file.
>>>
>>> Note: this change also (avoids/) fixes a python exception in PcapWriter
>>> with scapy 2.4.3 that comes from EPEL.
>>>
>>> Suggested-by: Ilya Maximets 
>>> Signed-off-by: David Marchand 
>>> ---
>>> Changes since v2:
>>> - updated documentation,
>>> - cleaned tests/automake.mk,
>>> - fixed shebang in python script,
>>> - added missing check for scapy availability,
>>>
>>> Changes since v1:
>>> - renamed generator script,
>>> - decreased packet count for fuzzy test,
>>> - simplified wait expression for packet count,
>>>
>>> ---
>>>  Documentation/topics/dpdk/bridge.rst |   7 ++--
>>>  tests/automake.mk|   7 +---
>>>  tests/genpkts.py |  56 +++
>>>  tests/mfex_fuzzy.py  |  32 ---
>>>  tests/pcap/mfex_test.pcap| Bin 416 -> 0 bytes
>>>  tests/system-dpdk-macros.at  |   4 +-
>>>  tests/system-dpdk.at |  33 +---
>>>  7 files changed, 89 insertions(+), 50 deletions(-)
>>>  create mode 100755 tests/genpkts.py
>>>  delete mode 100755 tests/mfex_fuzzy.py
>>>  delete mode 100644 tests/pcap/mfex_test.pcap
>>>
>>> diff --git a/Documentation/topics/dpdk/bridge.rst 
>>> b/Documentation/topics/dpdk/bridge.rst
>>> index f645b9ade5..648ce203eb 100644
>>> --- a/Documentation/topics/dpdk/bridge.rst
>>> +++ b/Documentation/topics/dpdk/bridge.rst
>>> @@ -408,7 +408,7 @@ Fuzzy tests can also be done on miniflow extract with 
>>> the help of
>>>  auto-validator and Scapy. The steps below describes the steps to
>>>  reproduce the setup with IP being fuzzed to generate packets.
>>>
>>> -Scapy is used to create fuzzy IP packets and save them into a PCAP ::
>>> +Scapy is used to create fuzzy IP packets (see tests/genpkts.py) ::
>>>
>>>  pkt = fuzz(Ether()/IP()/TCP())
>>>
>>> @@ -418,9 +418,8 @@ Set the miniflow extract to autovalidator using ::
>>>
>>>  OVS is configured to receive the generated packets ::
>>>
>>> -$ ovs-vsctl add-port br0 pcap0 -- \
>>> -set Interface pcap0 type=dpdk options:dpdk-devargs=net_pcap0
>>> -"rx_pcap=fuzzy.pcap"
>>> +$ ovs-vsctl add-port br0 p1 -- \
>>> +set Interface p1 type=dummy-pmd
>>>
>>>  With this workflow, the autovalidator will ensure that all MFEX
>>>  implementations are classifying each packet in exactly the same way.
>>
>> I'm actually getting a hard time understanding a value of this section
>> of the doc for the end user, who is the target audience for this doc.
>>
>> And the next section "Unit Fuzzy test with Autovalidator" is incorrectly
>> formatted and also almost to the word equal to the "Unit Test Miniflow
>> Extract" section.
>>
>> It doesn't make a lot of sense talking that functionality is covered by
>> unit tests.  Everything should be covered by unit tests by default.
>> And this is definitely not something that should be part of an end-user
>> high-level documentation.
>>
>> It's not a problem of a current patch set, but I'd suggest to just
>> remove the last 3 sections of dpdk/bridge.rst.  They should not be there.
> 
> I guess this comment is covered by your patch that refactors the
> documentation, correct?

Yep.

> I'll still update this part in my patch (and rebase if there is a
> conflict once your doc patch is merged).

OK.  And we might not even need a rebase, as rebase would mean just
dropping the documentation changes, AFAICT.  And that's not hard to
do while applying the patch.

> 
> 
>>
>>> diff --git a/tests/automake.mk b/tests/automake.mk
>>> index 43731d0973..3bc74a5aff 100644
>>> --- a/tests/automake.mk
>>> +++ b/tests/automake.mk
>>> @@ -143,11 +143,6 @@ $(srcdir)/tests/fuzz-regression-list.at: 
>>> tests/automake.mk
>>>   echo "TEST_FUZZ_REGRESSION([$$basename])"; \
>>>   done > $@.tmp && mv $@.tmp $@
>>>
>>> -EXTRA_DIST += $(MFEX_AUTOVALIDATOR_TESTS)
>>> -MFEX_AUTOVALIDATOR_TESTS = \
>>> - tests/pcap/mfex_test.pcap \
>>> - tests/mfex_fuzzy.py
>>> -
>>>  OVSDB_CLUSTER_TESTSUITE_AT = \
>>>   tests/ovsdb-cluster-testsuite.at \
>>>   tests/ovsdb-execution.at \
>>> @@ -518,7 +513,7 @@ tests_test_type_props_SOURCES = tests/test-type-props.c
>>>  CHECK_PYFILES = \
>>>   tests/appctl.py \
>>>   tests/flowgen.py \
>>> - tests/mfex_fuzzy.py \
>>> + tests/genpkts.py \
>>>   tests/ovsd

Re: [ovs-dev] [PATCH net v2 2/3] net/sched: flow_dissector: Fix matching on zone id for invalid conns

2021-12-14 Thread Paul Blakey via dev



On Sat, 11 Dec 2021, Jakub Kicinski wrote:

> On Thu, 9 Dec 2021 09:57:33 +0200 Paul Blakey wrote:
> > @@ -238,10 +239,12 @@ void
> >  skb_flow_dissect_ct(const struct sk_buff *skb,
> > struct flow_dissector *flow_dissector,
> > void *target_container, u16 *ctinfo_map,
> > -   size_t mapsize, bool post_ct)
> > +   size_t mapsize)
> >  {
> >  #if IS_ENABLED(CONFIG_NF_CONNTRACK)
> > +   bool post_ct = tc_skb_cb(skb)->post_ct;
> > struct flow_dissector_key_ct *key;
> > +   u16 zone = tc_skb_cb(skb)->zone;
> > enum ip_conntrack_info ctinfo;
> > struct nf_conn_labels *cl;
> > struct nf_conn *ct;
> > @@ -260,6 +263,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
> > if (!ct) {
> > key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
> > TCA_FLOWER_KEY_CT_FLAGS_INVALID;
> > +   key->ct_zone = zone;
> > return;
> > }
> >  
> 
> Why is flow dissector expecting skb cb to be TC now?
> Please keep the appropriate abstractions intact.
> 

Will do. Sending v3.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] docs: Re-work the documentation around CPU ISA optimizations.

2021-12-14 Thread David Marchand
On Tue, Dec 14, 2021 at 4:55 PM Ilya Maximets  wrote:
> >> +you see a message like the following one in the OVS logs, it means that 
> >> there
> >> +is no specialized implementation available for the current networking 
> >> traffic.
> >> +In this case, OVS will continue to process the traffic normally using a 
> >> more
> >> +generic lookup table.::
> >
> > I don't really get the intention for this part.
> >
> > This is rendered as "generic lookup table.:" and I don't get the
> > relation with the sentence below.
> > So I would simply end this sentence above with a .
>
>
> I think, the sentences should be swapped.  Something like this:
>
> ...
> If you see a message like the following one in the OVS logs, it means that 
> there
> is no specialized implementation available for the current network traffic::
>
>   Using non-specialized AVX512 lookup for subtable (X,Y) and possibly others.
>
> In this case, OVS will continue to process the traffic normally using a more
> generic lookup table.
> ...
>
> And the (Note ...) part below can actually be removed.
>
> What do you think?

Ah ok.
This is better as you propose.


>
> >
> >
> >> -"Using non-specialized AVX512 lookup for subtable (4,1) and possibly 
> >> others."
> >> +  Using non-specialized AVX512 lookup for subtable (4,1) and possibly 
> >> others.
> >
> > And make this a sub paragraph title instead.
> >
> >
> >>
> >>  (Note that the numbers 4 and 1 will likely be different in your logs)
> >>
> >> -Additional specialized lookups can be added to OVS if the user
> >> -provides that log message along with the command output as show
> >> -below to the OVS mailing list. Note that the numbers in the log
> >> -message ("subtable (X,Y)") need to match with the numbers in
> >> -the provided command output ("dp-extra-info:miniflow_bits(X,Y)").
> >> +Additional specialized lookups can be added to OVS if the user provides 
> >> that
> >> +log message along with the command output as show below to the OVS mailing
> >> +list.  Note that the numbers in the log message (``subtable (X,Y)``) need 
> >> to
> >> +match with the numbers in the provided command output
> >> +(``dp-extra-info:miniflow_bits(X,Y)``).
> >>
> >> -"ovs-appctl dpctl/dump-flows -m", which results in output like this:
> >> +``ovs-appctl dpctl/dump-flows -m``, which results in output like this::
> >>
> >>  ufid:82770b5d-ca38-44ff-8283-74ba36bd1ca5, 
> >> skb_priority(0/0),skb_mark(0/0)
> >>  ,ct_state(0/0),ct_zone(0/0),ct_mark(0/0),ct_label(0/0),recirc_id(0),

[snip]

> >> diff --git a/Documentation/topics/testing.rst 
> >> b/Documentation/topics/testing.rst
> >> index ea11700e3..85797df07 100644
> >> --- a/Documentation/topics/testing.rst
> >> +++ b/Documentation/topics/testing.rst
> >> @@ -356,7 +356,64 @@ The phy test will skip if no compatible physical 
> >> device is available.
> >>  .. _Configure hugepages: 
> >> https://doc.dpdk.org/guides-21.11/linux_gsg/sys_reqs.html
> >>
> >>  All the features documented under `Unit Tests`_ are available for the DPDK
> >> -datapath testsuite.
> >> +testsuite.
> >> +
> >> +Userspace datapath: CPU ISA Testing and Validation
> >> +''
> >
> > It could be worth rewording (or in a followup patch?): this block is
> > not about testing CPU ISA, but about testing optimisations of some
> > parts of OVS.
>
>
> Userspace datapath: Testing and Validation of CPU-specific Optimizations
>
> ?

+1

Thanks.


-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v3 0/3] net/sched: Fix ct zone matching for invalid conntrack state

2021-12-14 Thread Paul Blakey via dev
Hi,

Currently, when a packet is marked as invalid conntrack_in in act_ct,
post_ct will be set, and connection info (nf_conn) will be removed
from the skb. Later openvswitch and flower matching will parse this
as ct_state=+trk+inv. But because the connection info is missing,
there is also no zone info to match against even though the packet
is tracked.

This series fixes that, by passing the last executed zone by act_ct.
The zone info is passed along from act_ct to the ct flow dissector
(used by flower to extract zone info) and to ovs, the same way as post_ct
is passed, via qdisc layer skb cb to dissector, and via skb extension
to OVS.

Since adding any more data to qdisc skb cb, there will be no room 
for BPF skb cb to extend it and stay under skb->cb size, this series
moves the tc related info from within qdisc skb cb to a tc specific cb
that also extends it.

---
Changelog:
v3->v2:
  Moved tc skb cb details from dissector back to flower
v1->v2:
  Cover letter wording
  Added blamed CCs

Paul Blakey (3):
  net/sched: Extend qdisc control block with tc control block
  net/sched: flow_dissector: Fix matching on zone id for invalid conns
  net: openvswitch: Fix matching zone id for invalid conns arriving from tc

 include/linux/skbuff.h|  3 ++-
 include/net/pkt_sched.h   | 16 
 include/net/sch_generic.h |  2 --
 net/core/dev.c|  8 
 net/core/flow_dissector.c |  3 ++-
 net/openvswitch/flow.c|  8 +++-
 net/sched/act_ct.c| 15 ---
 net/sched/cls_api.c   |  7 +--
 net/sched/cls_flower.c|  6 --
 net/sched/sch_frag.c  |  3 ++-
 10 files changed, 50 insertions(+), 21 deletions(-)

-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v3 1/3] net/sched: Extend qdisc control block with tc control block

2021-12-14 Thread Paul Blakey via dev
BPF layer extends the qdisc control block via struct bpf_skb_data_end
and because of that there is no more room to add variables to the
qdisc layer control block without going over the skb->cb size.

Extend the qdisc control block with a tc control block,
and move all tc related variables to there as a pre-step for
extending the tc control block with additional members.

Signed-off-by: Paul Blakey 
---
 include/net/pkt_sched.h   | 15 +++
 include/net/sch_generic.h |  2 --
 net/core/dev.c|  8 
 net/sched/act_ct.c| 14 +++---
 net/sched/cls_api.c   |  6 --
 net/sched/cls_flower.c|  3 ++-
 net/sched/sch_frag.c  |  3 ++-
 7 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index bf79f3a890af..05f18e81f3e8 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -193,4 +193,19 @@ static inline void skb_txtime_consumed(struct sk_buff *skb)
skb->tstamp = ktime_set(0, 0);
 }
 
+struct tc_skb_cb {
+   struct qdisc_skb_cb qdisc_cb;
+
+   u16 mru;
+   bool post_ct;
+};
+
+static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
+{
+   struct tc_skb_cb *cb = (struct tc_skb_cb *)skb->cb;
+
+   BUILD_BUG_ON(sizeof(*cb) > sizeof_field(struct sk_buff, cb));
+   return cb;
+}
+
 #endif
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 22179b2fda72..c70e6d2b2fdd 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -447,8 +447,6 @@ struct qdisc_skb_cb {
};
 #define QDISC_CB_PRIV_LEN 20
unsigned char   data[QDISC_CB_PRIV_LEN];
-   u16 mru;
-   boolpost_ct;
 };
 
 typedef void tcf_chain_head_change_t(struct tcf_proto *tp_head, void *priv);
diff --git a/net/core/dev.c b/net/core/dev.c
index 2a352e668d10..c4708e2487fb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3941,8 +3941,8 @@ sch_handle_egress(struct sk_buff *skb, int *ret, struct 
net_device *dev)
return skb;
 
/* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */
-   qdisc_skb_cb(skb)->mru = 0;
-   qdisc_skb_cb(skb)->post_ct = false;
+   tc_skb_cb(skb)->mru = 0;
+   tc_skb_cb(skb)->post_ct = false;
mini_qdisc_bstats_cpu_update(miniq, skb);
 
switch (tcf_classify(skb, miniq->block, miniq->filter_list, &cl_res, 
false)) {
@@ -5103,8 +5103,8 @@ sch_handle_ingress(struct sk_buff *skb, struct 
packet_type **pt_prev, int *ret,
}
 
qdisc_skb_cb(skb)->pkt_len = skb->len;
-   qdisc_skb_cb(skb)->mru = 0;
-   qdisc_skb_cb(skb)->post_ct = false;
+   tc_skb_cb(skb)->mru = 0;
+   tc_skb_cb(skb)->post_ct = false;
skb->tc_at_ingress = 1;
mini_qdisc_bstats_cpu_update(miniq, skb);
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 90866ae45573..98e248b9c0b1 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -690,10 +690,10 @@ static int tcf_ct_handle_fragments(struct net *net, 
struct sk_buff *skb,
   u8 family, u16 zone, bool *defrag)
 {
enum ip_conntrack_info ctinfo;
-   struct qdisc_skb_cb cb;
struct nf_conn *ct;
int err = 0;
bool frag;
+   u16 mru;
 
/* Previously seen (loopback)? Ignore. */
ct = nf_ct_get(skb, &ctinfo);
@@ -708,7 +708,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
return err;
 
skb_get(skb);
-   cb = *qdisc_skb_cb(skb);
+   mru = tc_skb_cb(skb)->mru;
 
if (family == NFPROTO_IPV4) {
enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
@@ -722,7 +722,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
 
if (!err) {
*defrag = true;
-   cb.mru = IPCB(skb)->frag_max_size;
+   mru = IPCB(skb)->frag_max_size;
}
} else { /* NFPROTO_IPV6 */
 #if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
@@ -735,7 +735,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
 
if (!err) {
*defrag = true;
-   cb.mru = IP6CB(skb)->frag_max_size;
+   mru = IP6CB(skb)->frag_max_size;
}
 #else
err = -EOPNOTSUPP;
@@ -744,7 +744,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct 
sk_buff *skb,
}
 
if (err != -EINPROGRESS)
-   *qdisc_skb_cb(skb) = cb;
+   tc_skb_cb(skb)->mru = mru;
skb_clear_hash(skb);
skb->ignore_df = 1;
return err;
@@ -963,7 +963,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct 
tc_action *a,
tcf_action_update_bstats(&c->common, skb);
 
if (clear) {
-   qdisc_skb_cb(skb)->post_ct = false;
+   tc_

[ovs-dev] [PATCH net v3 3/3] net: openvswitch: Fix matching zone id for invalid conns arriving from tc

2021-12-14 Thread Paul Blakey via dev
Zone id is not restored if we passed ct and ct rejected the connection,
as there is no ct info on the skb.

Save the zone from tc skb cb to tc skb extension and pass it on to
ovs, use that info to restore the zone id for invalid connections.

Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do 
conntrack in act_ct")
Signed-off-by: Paul Blakey 
---
 include/linux/skbuff.h | 1 +
 net/openvswitch/flow.c | 8 +++-
 net/sched/cls_api.c| 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2ecf8cfd2223..4507d77d6941 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -286,6 +286,7 @@ struct nf_bridge_info {
 struct tc_skb_ext {
__u32 chain;
__u16 mru;
+   __u16 zone;
bool post_ct;
 };
 #endif
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index 9713035b89e3..6d262d9aa10e 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -34,6 +34,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "conntrack.h"
 #include "datapath.h"
@@ -860,6 +861,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #endif
bool post_ct = false;
int res, err;
+   u16 zone = 0;
 
/* Extract metadata from packet. */
if (tun_info) {
@@ -898,6 +900,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
key->recirc_id = tc_ext ? tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
+   zone = post_ct ? tc_ext->zone : 0;
} else {
key->recirc_id = 0;
}
@@ -906,8 +909,11 @@ int ovs_flow_key_extract(const struct ip_tunnel_info 
*tun_info,
 #endif
 
err = key_extract(skb, key);
-   if (!err)
+   if (!err) {
ovs_ct_fill_key(skb, key, post_ct);   /* Must be after 
key_extract(). */
+   if (post_ct && !skb_get_nfct(skb))
+   key->ct_zone = zone;
+   }
return err;
 }
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index a5050999d607..bede2bd47065 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1625,6 +1625,7 @@ int tcf_classify(struct sk_buff *skb,
ext->chain = last_executed_chain;
ext->mru = cb->mru;
ext->post_ct = cb->post_ct;
+   ext->zone = cb->zone;
}
 
return ret;
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH net v3 2/3] net/sched: flow_dissector: Fix matching on zone id for invalid conns

2021-12-14 Thread Paul Blakey via dev
If ct rejects a flow, it removes the conntrack info from the skb.
act_ct sets the post_ct variable so the dissector will see this case
as an +tracked +invalid state, but the zone id is lost with the
conntrack info.

To restore the zone id on such cases, set the last executed zone,
via the tc control block, when passing ct, and read it back in the
dissector if there is no ct info on the skb (invalid connection).

Fixes: 7baf2429a1a9 ("net/sched: cls_flower add CT_FLAGS_INVALID flag support")
Signed-off-by: Paul Blakey 
---
 include/linux/skbuff.h| 2 +-
 include/net/pkt_sched.h   | 1 +
 net/core/flow_dissector.c | 3 ++-
 net/sched/act_ct.c| 1 +
 net/sched/cls_flower.c| 3 ++-
 5 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c8cb7e697d47..2ecf8cfd2223 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1380,7 +1380,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container,
u16 *ctinfo_map, size_t mapsize,
-   bool post_ct);
+   bool post_ct, u16 zone);
 void
 skb_flow_dissect_tunnel_info(const struct sk_buff *skb,
 struct flow_dissector *flow_dissector,
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 05f18e81f3e8..9e71691c491b 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -198,6 +198,7 @@ struct tc_skb_cb {
 
u16 mru;
bool post_ct;
+   u16 zone; /* Only valid if post_ct = true */
 };
 
 static inline struct tc_skb_cb *tc_skb_cb(const struct sk_buff *skb)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 3255f57f5131..1b094c481f1d 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -238,7 +238,7 @@ void
 skb_flow_dissect_ct(const struct sk_buff *skb,
struct flow_dissector *flow_dissector,
void *target_container, u16 *ctinfo_map,
-   size_t mapsize, bool post_ct)
+   size_t mapsize, bool post_ct, u16 zone)
 {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
struct flow_dissector_key_ct *key;
@@ -260,6 +260,7 @@ skb_flow_dissect_ct(const struct sk_buff *skb,
if (!ct) {
key->ct_state = TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
TCA_FLOWER_KEY_CT_FLAGS_INVALID;
+   key->ct_zone = zone;
return;
}
 
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 98e248b9c0b1..ab3591408419 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1049,6 +1049,7 @@ static int tcf_ct_act(struct sk_buff *skb, const struct 
tc_action *a,
skb_push_rcsum(skb, nh_ofs);
 
tc_skb_cb(skb)->post_ct = true;
+   tc_skb_cb(skb)->zone = p->zone;
 out_clear:
if (defrag)
qdisc_skb_cb(skb)->pkt_len = skb->len;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 9782b93db1b3..ef54ed395874 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -311,6 +311,7 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
 {
struct cls_fl_head *head = rcu_dereference_bh(tp->root);
bool post_ct = tc_skb_cb(skb)->post_ct;
+   u16 zone = tc_skb_cb(skb)->zone;
struct fl_flow_key skb_key;
struct fl_flow_mask *mask;
struct cls_fl_filter *f;
@@ -328,7 +329,7 @@ static int fl_classify(struct sk_buff *skb, const struct 
tcf_proto *tp,
skb_flow_dissect_ct(skb, &mask->dissector, &skb_key,
fl_ct_info_to_flower_map,
ARRAY_SIZE(fl_ct_info_to_flower_map),
-   post_ct);
+   post_ct, zone);
skb_flow_dissect_hash(skb, &mask->dissector, &skb_key);
skb_flow_dissect(skb, &mask->dissector, &skb_key,
 FLOW_DISSECTOR_F_STOP_BEFORE_ENCAP);
-- 
2.30.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH] system-dpdk: Fix race in vhost-user tests.

2021-12-14 Thread David Marchand
Waiting only on the vhost user port to be ready is not enough since a
tap is also initialized by testpmd and is used to inject/receive packets
in/from the kernel.
Wait on the tap link status.

Fixes: 18db7ec5eb83 ("system-dpdk: Improve vhost-user ping tests reliability.")
Signed-off-by: David Marchand 
---
 tests/system-dpdk.at | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index 0e0b340d74..e39fc0d938 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -123,6 +123,7 @@ tail -f /dev/null | dpdk-testpmd --socket-mem="$(cat 
NUMA_NODE)" --no-pci\
--single-file-segments -- -a 
>$OVS_RUNDIR/testpmd-dpdkvhostuser0.log 2>&1 &
 
 OVS_WAIT_UNTIL([grep "virtio is now ready for processing" ovs-vswitchd.log])
+OVS_WAIT_UNTIL([ip link show dev tap0 | grep -qw LOWER_UP])
 
 dnl Move the tap devices to the namespaces
 AT_CHECK([ps aux | grep testpmd], [], [stdout], [stderr])
@@ -199,6 +200,7 @@ tail -f /dev/null | dpdk-testpmd --socket-mem="$(cat 
NUMA_NODE)" --no-pci\
--single-file-segments -- -a 
>$OVS_RUNDIR/testpmd-dpdkvhostuserclient0.log 2>&1 &
 
 OVS_WAIT_UNTIL([grep "virtio is now ready for processing" ovs-vswitchd.log])
+OVS_WAIT_UNTIL([ip link show dev tap0 | grep -qw LOWER_UP])
 
 dnl Move the tap devices to the namespaces
 AT_CHECK([ps aux | grep testpmd], [], [stdout], [stderr])
-- 
2.23.0

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] system-dpdk: Fix race in vhost-user tests.

2021-12-14 Thread Maxime Coquelin




On 12/14/21 21:42, David Marchand wrote:

Waiting only on the vhost user port to be ready is not enough since a
tap is also initialized by testpmd and is used to inject/receive packets
in/from the kernel.
Wait on the tap link status.

Fixes: 18db7ec5eb83 ("system-dpdk: Improve vhost-user ping tests reliability.")
Signed-off-by: David Marchand 
---
  tests/system-dpdk.at | 2 ++
  1 file changed, 2 insertions(+)

diff --git a/tests/system-dpdk.at b/tests/system-dpdk.at
index 0e0b340d74..e39fc0d938 100644
--- a/tests/system-dpdk.at
+++ b/tests/system-dpdk.at
@@ -123,6 +123,7 @@ tail -f /dev/null | dpdk-testpmd --socket-mem="$(cat 
NUMA_NODE)" --no-pci\
 --single-file-segments -- -a >$OVS_RUNDIR/testpmd-dpdkvhostuser0.log 
2>&1 &
  
  OVS_WAIT_UNTIL([grep "virtio is now ready for processing" ovs-vswitchd.log])

+OVS_WAIT_UNTIL([ip link show dev tap0 | grep -qw LOWER_UP])
  
  dnl Move the tap devices to the namespaces

  AT_CHECK([ps aux | grep testpmd], [], [stdout], [stderr])
@@ -199,6 +200,7 @@ tail -f /dev/null | dpdk-testpmd --socket-mem="$(cat 
NUMA_NODE)" --no-pci\
 --single-file-segments -- -a >$OVS_RUNDIR/testpmd-dpdkvhostuserclient0.log 
2>&1 &
  
  OVS_WAIT_UNTIL([grep "virtio is now ready for processing" ovs-vswitchd.log])

+OVS_WAIT_UNTIL([ip link show dev tap0 | grep -qw LOWER_UP])
  
  dnl Move the tap devices to the namespaces

  AT_CHECK([ps aux | grep testpmd], [], [stdout], [stderr])



Thanks for the fix.

Reviewed-by: Maxime Coquelin 

Thanks,
Maxime

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v5] dpcls: Change info-get function to fetch dpcls usage stats.

2021-12-14 Thread Kumar Amber
Modified the dplcs info-get command output to include
the count for different dpcls implementations.

$ovs-appctl dpif-netdev/subtable-lookup-prio-get

Available dpcls implementations:
  autovalidator (Use count: 1, Priority: 5)
  generic (Use count: 0, Priority: 1)
  avx512_gather (Use count: 0, Priority: 3)

Test case to verify changes:
1021: PMD - dpcls configuration ok

Signed-off-by: Kumar Amber 
Signed-off-by: Harry van Haaren 
Signed-off-by: Eelco Chaudron 
Co-authored-by: Harry van Haaren 
Co-authored-by: Eelco Chaudron 
---
v5:
- change the info-incr and decr APIs.
- Reduce the complexity of dpcls stats APIs.
v4:
- Fix comments on the patch.
- Change API from an overloaded method of counting, to returning the
  old and new subtable structs. This allows the caller to identify the
  modified subtable implementations, and update the statistics accordingly.
v3:
- Fix comments on the patch.
- Function API remains same, see discussion on OVS ML here:
  "https://mail.openvswitch.org/pipermail/ovs-dev/2021-October/388737.html";
v2:
- Dependency merged rebased to master.

---
---
 Documentation/topics/dpdk/bridge.rst | 16 ++---
 lib/dpif-netdev-lookup.c | 98 +---
 lib/dpif-netdev-lookup.h | 18 -
 lib/dpif-netdev-private-dpcls.h  |  1 +
 lib/dpif-netdev.c| 39 ++-
 tests/pmd.at | 16 ++---
 6 files changed, 129 insertions(+), 59 deletions(-)

diff --git a/Documentation/topics/dpdk/bridge.rst 
b/Documentation/topics/dpdk/bridge.rst
index f645b9ade..63a54da1c 100644
--- a/Documentation/topics/dpdk/bridge.rst
+++ b/Documentation/topics/dpdk/bridge.rst
@@ -156,10 +156,10 @@ OVS provides multiple implementations of dpcls. The 
following command enables
 the user to check what implementations are available in a running instance ::
 
 $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
-Available lookup functions (priority : name)
-0 : autovalidator
-1 : generic
-0 : avx512_gather
+Available dpcls implementations:
+autovalidator (Use count: 1, Priority: 5)
+generic (Use count: 0, Priority: 1)
+avx512_gather (Use count: 0, Priority: 3)
 
 To set the priority of a lookup function, run the ``prio-set`` command ::
 
@@ -172,10 +172,10 @@ function due to the command being run. To verify the 
prioritization, re-run the
 get command, note the updated priority of the ``avx512_gather`` function ::
 
 $ ovs-appctl dpif-netdev/subtable-lookup-prio-get
-Available lookup functions (priority : name)
-0 : autovalidator
-1 : generic
-5 : avx512_gather
+Available dpcls implementations:
+autovalidator (Use count: 0, Priority: 0)
+generic (Use count: 0, Priority: 0)
+avx512_gather (Use count: 1, Priority: 5)
 
 If two lookup functions have the same priority, the first one in the list is
 chosen, and the 2nd occurance of that priority is not used. Put in logical
diff --git a/lib/dpif-netdev-lookup.c b/lib/dpif-netdev-lookup.c
index bd0a99abe..e641e4028 100644
--- a/lib/dpif-netdev-lookup.c
+++ b/lib/dpif-netdev-lookup.c
@@ -36,18 +36,21 @@ static struct dpcls_subtable_lookup_info_t 
subtable_lookups[] = {
 { .prio = 0,
 #endif
   .probe = dpcls_subtable_autovalidator_probe,
-  .name = "autovalidator", },
+  .name = "autovalidator",
+  .usage_cnt = ATOMIC_COUNT_INIT(0), },
 
 /* The default scalar C code implementation. */
 { .prio = 1,
   .probe = dpcls_subtable_generic_probe,
-  .name = "generic", },
+  .name = "generic",
+  .usage_cnt = ATOMIC_COUNT_INIT(0), },
 
 #if (__x86_64__ && HAVE_AVX512F && HAVE_LD_AVX512_GOOD && __SSE4_2__)
 /* Only available on x86_64 bit builds with SSE 4.2 used for OVS core. */
 { .prio = 0,
   .probe = dpcls_subtable_avx512_gather_probe,
-  .name = "avx512_gather", },
+  .name = "avx512_gather",
+  .usage_cnt = ATOMIC_COUNT_INIT(0), },
 #else
 /* Disabling AVX512 at compile time, as compile time requirements not met.
  * This could be due to a number of reasons:
@@ -64,7 +67,7 @@ static struct dpcls_subtable_lookup_info_t subtable_lookups[] 
= {
 #endif
 };
 
-int32_t
+int
 dpcls_subtable_lookup_info_get(struct dpcls_subtable_lookup_info_t **out_ptr)
 {
 if (out_ptr == NULL) {
@@ -76,7 +79,7 @@ dpcls_subtable_lookup_info_get(struct 
dpcls_subtable_lookup_info_t **out_ptr)
 }
 
 /* sets the priority of the lookup function with "name". */
-int32_t
+int
 dpcls_subtable_set_prio(const char *name, uint8_t priority)
 {
 for (int i = 0; i < ARRAY_SIZE(subtable_lookups); i++) {
@@ -93,32 +96,81 @@ dpcls_subtable_set_prio(const char *name, uint8_t priority)
 }
 
 dpcls_subtable_lookup_func
-dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count)
+dpcls_subtable_get_best_impl(uint32_t u0_bit_count, uint32_t u1_bit_count,
+