Re: [ovs-dev] [PATCH] appveyor: Fix caching of OpenSSL installer.

2024-06-14 Thread Ilya Maximets
On 6/11/24 00:09, Alin Serdean wrote:
> Acked-by: Alin-Gabriel Serdean 

Thanks!  Applied.

Best regards, Ilya Maximets.

> 
> On Mon, Jun 10, 2024 at 11:18 PM Ilya Maximets  > wrote:
> 
> Apparently, if the cache dependency is specified, the cache folder
> is not checked at the end of a build and so the cache is never
> updated unless we change appveyor.yml.  This makes the cache to not
> actually work, because on each build we discover that the installer
> is outdated, download the new one and it is not uploaded to the cache,
> so it is still outdated on the next build.
> 
> Removing the dependency to get a normal cache behavior.  We're
> manually comparing the hash of the cached binary with the most
> latest one, so we will still catch any OpenSSL updates, but now
> we will also upload the updated cache back.
> 
> Fixes: 9d8208484a35 ("appveyor: Build with OpenSSL 3.0.")
> Reported-at: 
> https://help.appveyor.com/discussions/problems/36144-cache-reports-up-to-date-while-it-is-not
> Signed-off-by: Ilya Maximets 
> ---
>  appveyor.yml | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/appveyor.yml b/appveyor.yml
> index d11e46399..d0293b211 100644
> --- a/appveyor.yml
> +++ b/appveyor.yml
> @@ -15,7 +15,7 @@ init:
>                                -Value "C:\Python312-x64\python.exe"
> 
>  cache:
> -- C:\ovs-build-downloads -> appveyor.yml
> +- C:\ovs-build-downloads
> 
>  install:
>  - ps: |
> -- 
> 2.45.0
> 
> ___
> dev mailing list
> d...@openvswitch.org 
> https://mail.openvswitch.org/mailman/listinfo/ovs-dev 
> 
> 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v2 9/9] selftests: openvswitch: add emit_sample test

2024-06-14 Thread Aaron Conole
Adrian Moreno  writes:

> Add a test to verify sampling packets via psample works.
>
> In order to do that, create a subcommand in ovs-dpctl.py to listen to
> on the psample multicast group and print samples.
>
> In order to also test simultaneous sFlow and psample actions and
> packet truncation, add missing parsing support for "userspace" and
> "trunc" actions.

Maybe split that into a separate patch.  This has a bugfix and 3
features being pushed in.  I know it's already getting long as a series,
so maybe it's okay to fold the userspace attribute bugfix with the parse
support (since it wasn't really usable before).

> Signed-off-by: Adrian Moreno 
> ---
>  .../selftests/net/openvswitch/openvswitch.sh  |  99 +++-
>  .../selftests/net/openvswitch/ovs-dpctl.py| 112 +-
>  2 files changed, 204 insertions(+), 7 deletions(-)
>
> diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh 
> b/tools/testing/selftests/net/openvswitch/openvswitch.sh
> index 5cae53543849..f6e0ae3f6424 100755
> --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh
> +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
> @@ -20,7 +20,8 @@ tests="
>   nat_related_v4  ip4-nat-related: ICMP related 
> matches work with SNAT
>   netlink_checks  ovsnl: validate netlink attrs 
> and settings
>   upcall_interfaces   ovs: test the upcall interfaces
> - drop_reason drop: test drop reasons are 
> emitted"
> + drop_reason drop: test drop reasons are 
> emitted
> + emit_sample emit_sample: Sampling packets 
> with psample"
>  
>  info() {
>  [ $VERBOSE = 0 ] || echo $*
> @@ -170,6 +171,19 @@ ovs_drop_reason_count()
>   return `echo "$perf_output" | grep "$pattern" | wc -l`
>  }
>  
> +ovs_test_flow_fails () {
> + ERR_MSG="Flow actions may not be safe on all matching packets"
> +
> + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
> + ovs_add_flow $@ &> /dev/null $@ && return 1
> + POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
> +
> + if [ "$PRE_TEST" == "$POST_TEST" ]; then
> + return 1
> + fi
> + return 0
> +}
> +
>  usage() {
>   echo
>   echo "$0 [OPTIONS] [TEST]..."
> @@ -184,6 +198,89 @@ usage() {
>   exit 1
>  }
>  
> +
> +# emit_sample test
> +# - use emit_sample to observe packets
> +test_emit_sample() {
> + sbx_add "test_emit_sample" || return $?
> +
> + # Add a datapath with per-vport dispatching.
> + ovs_add_dp "test_emit_sample" emit_sample -V 2:1 || return 1
> +
> + info "create namespaces"
> + ovs_add_netns_and_veths "test_emit_sample" "emit_sample" \
> + client c0 c1 172.31.110.10/24 -u || return 1
> + ovs_add_netns_and_veths "test_emit_sample" "emit_sample" \
> + server s0 s1 172.31.110.20/24 -u || return 1
> +
> + # Check if emit_sample actions can be configured.
> + ovs_add_flow "test_emit_sample" emit_sample \
> + 'in_port(1),eth(),eth_type(0x0806),arp()' 'emit_sample(group=1)'
> + if [ $? == 1 ]; then
> + info "no support for emit_sample - skipping"
> + ovs_exit_sig
> + return $ksft_skip
> + fi
> +
> + ovs_del_flows "test_emit_sample" emit_sample
> +
> + # Allow ARP
> + ovs_add_flow "test_emit_sample" emit_sample \
> + 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
> + ovs_add_flow "test_emit_sample" emit_sample \
> + 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
> +
> + # Test action verification.
> + OLDIFS=$IFS
> + IFS='*'
> + min_key='in_port(1),eth(),eth_type(0x0800),ipv4()'
> + for testcase in \
> + "cookie to 
> large"*"emit_sample(group=1,cookie=1615141312111009080706050403020100)" \
> + "no group with cookie"*"emit_sample(cookie=abcd)" \
> + "no group"*"sample()";
> + do
> + set -- $testcase;
> + ovs_test_flow_fails "test_emit_sample" emit_sample $min_key $2
> + if [ $? == 1 ]; then
> + info "failed - $1"
> + return 1
> + fi
> + done
> + IFS=$OLDIFS
> +
> + # Sample first 14 bytes of all traffic.
> + ovs_add_flow "test_emit_sample" emit_sample \
> + 
> "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=1),icmp()" 
> "trunc(14),emit_sample(group=1,cookie=c0ffee),2"
> +
> + # Sample all traffic. In this case, use a sample() action with both
> + # emit_sample and an upcall emulating simultaneous local sampling and
> + # sFlow / IPFIX.
> + nlpid=$(grep -E "listening on upcall packet handler" $ovs_dir/s0.out | 
> cut -d ":" -f 2 | tr -d ' ')
> + ovs_add_flow "test_emit_sample" emit_sample \
> + 
> "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 
> "sample

Re: [ovs-dev] [PATCH net-next v2 6/9] net: openvswitch: store sampling probability in cb.

2024-06-14 Thread Aaron Conole
Adrian Moreno  writes:

> The behavior of actions might not be the exact same if they are being
> executed inside a nested sample action. Store the probability of the
> parent sample action in the skb's cb area.

What does that mean?

> Use the probability in emit_sample to pass it down to psample.
>
> Signed-off-by: Adrian Moreno 
> ---
>  include/uapi/linux/openvswitch.h |  3 ++-
>  net/openvswitch/actions.c| 25 ++---
>  net/openvswitch/datapath.h   |  3 +++
>  net/openvswitch/vport.c  |  1 +
>  4 files changed, 28 insertions(+), 4 deletions(-)
>
> diff --git a/include/uapi/linux/openvswitch.h 
> b/include/uapi/linux/openvswitch.h
> index a0e9dde0584a..9d675725fa2b 100644
> --- a/include/uapi/linux/openvswitch.h
> +++ b/include/uapi/linux/openvswitch.h
> @@ -649,7 +649,8 @@ enum ovs_flow_attr {
>   * Actions are passed as nested attributes.
>   *
>   * Executes the specified actions with the given probability on a per-packet
> - * basis.
> + * basis. Nested actions will be able to access the probability value of the
> + * parent @OVS_ACTION_ATTR_SAMPLE.
>   */
>  enum ovs_sample_attr {
>   OVS_SAMPLE_ATTR_UNSPEC,
> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
> index 3b4dba0ded59..33f6d93ba5e4 100644
> --- a/net/openvswitch/actions.c
> +++ b/net/openvswitch/actions.c
> @@ -1048,12 +1048,15 @@ static int sample(struct datapath *dp, struct sk_buff 
> *skb,
>   struct nlattr *sample_arg;
>   int rem = nla_len(attr);
>   const struct sample_arg *arg;
> + u32 init_probability;
>   bool clone_flow_key;
> + int err;
>  
>   /* The first action is always 'OVS_SAMPLE_ATTR_ARG'. */
>   sample_arg = nla_data(attr);
>   arg = nla_data(sample_arg);
>   actions = nla_next(sample_arg, &rem);
> + init_probability = OVS_CB(skb)->probability;
>  
>   if ((arg->probability != U32_MAX) &&
>   (!arg->probability || get_random_u32() > arg->probability)) {
> @@ -1062,9 +1065,21 @@ static int sample(struct datapath *dp, struct sk_buff 
> *skb,
>   return 0;
>   }
>  
> + if (init_probability) {
> + OVS_CB(skb)->probability = ((u64)OVS_CB(skb)->probability *
> + arg->probability / U32_MAX);
> + } else {
> + OVS_CB(skb)->probability = arg->probability;
> + }
> +

I'm confused by this.  Eventually, integer arithmetic will practically
guarantee that nested sample() calls will go to 0.  So eventually, the
test above will be impossible to meet mathematically.

OTOH, you could argue that a 1% of 50% is low anyway, but it still would
have a positive probability count, and still be possible for
get_random_u32() call to match.

I'm not sure about this particular change.  Why do we need it?

>   clone_flow_key = !arg->exec;
> - return clone_execute(dp, skb, key, 0, actions, rem, last,
> -  clone_flow_key);
> + err = clone_execute(dp, skb, key, 0, actions, rem, last,
> + clone_flow_key);
> +
> + if (!last)

Is this right?  Don't we only want to set the probability on the last
action?  Should the test be 'if (last)'?

> + OVS_CB(skb)->probability = init_probability;
> +
> + return err;
>  }
>  
>  /* When 'last' is true, clone() should always consume the 'skb'.
> @@ -1313,6 +1328,7 @@ static int execute_emit_sample(struct datapath *dp, 
> struct sk_buff *skb,
>   struct psample_metadata md = {};
>   struct vport *input_vport;
>   const struct nlattr *a;
> + u32 rate;
>   int rem;
>  
>   for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
> @@ -1337,8 +1353,11 @@ static int execute_emit_sample(struct datapath *dp, 
> struct sk_buff *skb,
>  
>   md.in_ifindex = input_vport->dev->ifindex;
>   md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
> + md.rate_as_probability = 1;
> +
> + rate = OVS_CB(skb)->probability ? OVS_CB(skb)->probability : U32_MAX;
>  
> - psample_sample_packet(&psample_group, skb, 0, &md);
> + psample_sample_packet(&psample_group, skb, rate, &md);
>  #endif
>  
>   return 0;
> diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
> index 0cd29971a907..9ca6231ea647 100644
> --- a/net/openvswitch/datapath.h
> +++ b/net/openvswitch/datapath.h
> @@ -115,12 +115,15 @@ struct datapath {
>   * fragmented.
>   * @acts_origlen: The netlink size of the flow actions applied to this skb.
>   * @cutlen: The number of bytes from the packet end to be removed.
> + * @probability: The sampling probability that was applied to this skb; 0 
> means
> + * no sampling has occurred; U32_MAX means 100% probability.
>   */
>  struct ovs_skb_cb {
>   struct vport*input_vport;
>   u16 mru;
>   u16 acts_origlen;
>   u32 cutlen;
> + u32 probability;
>  };
>  #define OVS_CB(skb) ((struct

Re: [ovs-dev] [PATCH v3] netdev-dpdk: Use LSC interrupt mode.

2024-06-14 Thread Ilya Maximets
On 6/14/24 17:08, David Marchand wrote:
> Querying link status may get delayed for an undeterministic (long) time
> with mlx5 ports. This is a consequence of the mlx5 driver calling ethtool
> kernel API and getting stuck on the kernel RTNL lock while some other
> operation is in progress under this lock.
> 
> One impact for long link status query is that it is called under the bond
> lock taken in write mode periodically in bond_run().
> In parallel, datapath threads may block requesting to read bonding related
> info (like for example in bond_check_admissibility()).
> 
> The LSC interrupt mode is available with many DPDK drivers and is used by
> default with testpmd.
> 
> It seems safe enough to switch on this feature by default in OVS.
> We keep the per interface option to disable this feature in case of an
> unforeseen bug.
> 
> Signed-off-by: David Marchand 
> Reviewed-by: Robin Jarry 
> Acked-by: Mike Pattrick 
> ---
> Changes since v2:
> - fixed typo in NEWS,
> 
> Changes since v1:
> - (early) fail when interrupt lsc is requested by user but not supported
>   by the driver,
> - otherwise, log a debug message if user did not request interrupt mode,
> 
> ---
>  Documentation/topics/dpdk/phy.rst |  4 ++--
>  NEWS  |  3 +++
>  lib/netdev-dpdk.c | 13 -
>  vswitchd/vswitch.xml  |  8 
>  4 files changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/Documentation/topics/dpdk/phy.rst 
> b/Documentation/topics/dpdk/phy.rst
> index efd168cba8..eefc25613d 100644
> --- a/Documentation/topics/dpdk/phy.rst
> +++ b/Documentation/topics/dpdk/phy.rst
> @@ -546,8 +546,8 @@ the firmware every time to fulfil this request.
>  
>  Note that not all PMD drivers support LSC interrupts.
>  
> -The default configuration is polling mode. To set interrupt mode, option
> -``dpdk-lsc-interrupt`` has to be set to ``true``.
> +The default configuration is interrupt mode. To set polling mode, option
> +``dpdk-lsc-interrupt`` has to be set to ``false``.
>  
>  Command to set interrupt mode for a specific interface::
>  $ ovs-vsctl set interface  options:dpdk-lsc-interrupt=true
> diff --git a/NEWS b/NEWS
> index 5ae0108d55..d05f2d0f89 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,6 +9,9 @@ Post-v3.3.0
>   https://github.com/openvswitch/ovs.git
> - DPDK:
>   * OVS validated with DPDK 23.11.1.
> + * Link status changes are now handled via interrupt mode if the DPDK
> +   driver supports it.  It is possible to revert to polling mode by 
> setting
> +   per interface 'options:dpdk-lsc-interrupt' to 'false'.
>  
>  
>  v3.3.0 - 16 Feb 2024
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 0fa37d5145..a260bc8485 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2397,7 +2397,18 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
> struct smap *args,
>  }
>  }
>  
> -lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", false);
> +lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", true);
> +if (lsc_interrupt_mode && !(*info.dev_flags & RTE_ETH_DEV_INTR_LSC)) {
> +if (smap_get(args, "dpdk-lsc-interrupt")) {
> +VLOG_ERR("interface '%s': link status interrupt is not 
> supported.",
> + netdev_get_name(netdev));

Since we're exiting with an error set, the message should be buffered
into errp instead, so it can be visible in the database record and
returned as a result of the ovs-vsctl.

Also, we're using WARN level for all other configuration issues, so we
should do that here as well.  ERR is usually some sort of internal error.
And we're usually just using "%s: ..." and not "interface '%s': ...".

> +err = EINVAL;
> +goto out;
> +}
> +VLOG_DBG("interface '%s': not enabling link status interrupt.",
> + netdev_get_name(netdev));
> +lsc_interrupt_mode = false;
> +}
>  if (dev->requested_lsc_interrupt_mode != lsc_interrupt_mode) {
>  dev->requested_lsc_interrupt_mode = lsc_interrupt_mode;
>  netdev_request_reconfigure(netdev);
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 8a1b607d71..e3afb78a4e 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -4647,12 +4647,12 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 
> type=patch options:peer=p1 \
>type='{"type": "boolean"}'>
>  
> -  Set this value to true to configure interrupt mode for
> -  Link State Change (LSC) detection instead of poll mode for the DPDK
> -  interface.
> +  Set this value to false to configure poll mode for
> +  Link State Change (LSC) detection instead of interrupt mode for the
> +  DPDK interface.
>  
>  
> -  If this value is not set, poll mode is configured.
> +  If this value is not set, interrupt mode is configured.
>

Re: [ovs-dev] [PATCH net-next v2 7/9] net: openvswitch: do not notify drops inside sample

2024-06-14 Thread Simon Horman
On Mon, Jun 03, 2024 at 08:56:41PM +0200, Adrian Moreno wrote:
> The OVS_ACTION_ATTR_SAMPLE action is, in essence,
> observability-oriented.
> 
> Apart from some corner case in which it's used a replacement of clone()
> for old kernels, it's really only used for sFlow, IPFIX and now,
> local emit_sample.
> 
> With this in mind, it doesn't make much sense to report
> OVS_DROP_LAST_ACTION inside sample actions.
> 
> For instance, if the flow:
> 
>   actions:sample(..,emit_sample(..)),2
> 
> triggers a OVS_DROP_LAST_ACTION skb drop event, it would be extremely
> confusing for users since the packet did reach its destination.
> 
> This patch makes internal action execution silently consume the skb
> instead of notifying a drop for this case.
> 
> Unfortunately, this patch does not remove all potential sources of
> confusion since, if the sample action itself is the last action, e.g:
> 
> actions:sample(..,emit_sample(..))
> 
> we actually _should_ generate a OVS_DROP_LAST_ACTION event, but we aren't.
> 
> Sadly, this case is difficult to solve without breaking the
> optimization by which the skb is not cloned on last sample actions.
> But, given explicit drop actions are now supported, OVS can just add one
> after the last sample() and rewrite the flow as:
> 
> actions:sample(..,emit_sample(..)),drop
> 
> Signed-off-by: Adrian Moreno 

Reviewed-by: Simon Horman 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v2 3/9] net: psample: skip packet copy if no listeners

2024-06-14 Thread Simon Horman
On Mon, Jun 03, 2024 at 08:56:37PM +0200, Adrian Moreno wrote:
> If nobody is listening on the multicast group, generating the sample,
> which involves copying packet data, seems completely unnecessary.
> 
> Return fast in this case.
> 
> Signed-off-by: Adrian Moreno 

Reviewed-by: Simon Horman 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v2 2/9] net: sched: act_sample: add action cookie to sample

2024-06-14 Thread Simon Horman
On Mon, Jun 03, 2024 at 08:56:36PM +0200, Adrian Moreno wrote:
> If the action has a user_cookie, pass it along to the sample so it can
> be easily identified.
> 
> Signed-off-by: Adrian Moreno 

Reviewed-by: Simon Horman 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v2 1/9] net: psample: add user cookie

2024-06-14 Thread Simon Horman
On Mon, Jun 03, 2024 at 08:56:35PM +0200, Adrian Moreno wrote:
> Add a user cookie to the sample metadata so that sample emitters can
> provide more contextual information to samples.
> 
> If present, send the user cookie in a new attribute:
> PSAMPLE_ATTR_USER_COOKIE.
> 
> Signed-off-by: Adrian Moreno 


Reviewed-by: Simon Horman 

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v2 5/9] net: openvswitch: add emit_sample action

2024-06-14 Thread Simon Horman
On Mon, Jun 03, 2024 at 08:56:39PM +0200, Adrian Moreno wrote:
> Add support for a new action: emit_sample.
> 
> This action accepts a u32 group id and a variable-length cookie and uses
> the psample multicast group to make the packet available for
> observability.
> 
> The maximum length of the user-defined cookie is set to 16, same as
> tc_cookie, to discourage using cookies that will not be offloadable.
> 
> Signed-off-by: Adrian Moreno 

...

> diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c

...

> @@ -1299,6 +1304,46 @@ static int execute_dec_ttl(struct sk_buff *skb, struct 
> sw_flow_key *key)
>   return 0;
>  }
>  
> +static int execute_emit_sample(struct datapath *dp, struct sk_buff *skb,
> +const struct sw_flow_key *key,
> +const struct nlattr *attr)
> +{
> +#if IS_ENABLED(CONFIG_PSAMPLE)
> + struct psample_group psample_group = {};
> + struct psample_metadata md = {};
> + struct vport *input_vport;
> + const struct nlattr *a;
> + int rem;
> +
> + for (a = nla_data(attr), rem = nla_len(attr); rem > 0;
> +  a = nla_next(a, &rem)) {
> + switch (nla_type(a)) {
> + case OVS_EMIT_SAMPLE_ATTR_GROUP:
> + psample_group.group_num = nla_get_u32(a);
> + break;
> +
> + case OVS_EMIT_SAMPLE_ATTR_COOKIE:
> + md.user_cookie = nla_data(a);
> + md.user_cookie_len = nla_len(a);
> + break;
> + }
> + }
> +
> + psample_group.net = ovs_dp_get_net(dp);
> +
> + input_vport = ovs_vport_rcu(dp, key->phy.in_port);
> + if (!input_vport)
> + input_vport = ovs_vport_rcu(dp, OVSP_LOCAL);
> +
> + md.in_ifindex = input_vport->dev->ifindex;
> + md.trunc_size = skb->len - OVS_CB(skb)->cutlen;
> +
> + psample_sample_packet(&psample_group, skb, 0, &md);
> +#endif
> +
> + return 0;
> +}
> +
>  /* Execute a list of actions against 'skb'. */
>  static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
> struct sw_flow_key *key,
> @@ -1502,6 +1547,11 @@ static int do_execute_actions(struct datapath *dp, 
> struct sk_buff *skb,
>   ovs_kfree_skb_reason(skb, reason);
>   return 0;
>   }
> +
> + case OVS_ACTION_ATTR_EMIT_SAMPLE:
> + err = execute_emit_sample(dp, skb, key, a);
> + OVS_CB(skb)->cutlen = 0;
> + break;
>   }

Hi Adrian,

execute_emit_sample always returns 0, and it seems that err will always
be 0 when the code above is executed. So perhaps the return type
of execute_emit_sample could be changed to void and the code above be
updated not to set err.

Other than that, which I don't feel particularly strongly about,
this looks good to me.

...
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next v2 4/9] net: psample: allow using rate as probability

2024-06-14 Thread Simon Horman
On Mon, Jun 03, 2024 at 08:56:38PM +0200, Adrian Moreno wrote:
> Although not explicitly documented in the psample module itself, the
> definition of PSAMPLE_ATTR_SAMPLE_RATE seems inherited from act_sample.
> 
> Quoting tc-sample(8):
> "RATE of 100 will lead to an average of one sampled packet out of every
> 100 observed."
> 
> With this semantics, the rates that we can express with an unsigned
> 32-bits number are very unevenly distributed and concentrated towards
> "sampling few packets".
> For example, we can express a probability of 2.32E-8% but we
> cannot express anything between 100% and 50%.
> 
> For sampling applications that are capable of sampling a decent
> amount of packets, this sampling rate semantics is not very useful.
> 
> Add a new flag to the uAPI that indicates that the sampling rate is
> expressed in scaled probability, this is:
> - 0 is 0% probability, no packets get sampled.
> - U32_MAX is 100% probability, all packets get sampled.
> 
> Signed-off-by: Adrian Moreno 

Hi Adrian,

Would it be possible to add appropriate documentation for
rate - both the original ratio variant, and the new probability
variant - somewhere?

That aside, this looks good to me.

...
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH net-next 2/2] selftests: openvswitch: set value to nla flags

2024-06-14 Thread Aaron Conole
Adrián Moreno  writes:

> On Mon, Jun 03, 2024 at 03:02:46PM GMT, Aaron Conole wrote:
>> Adrian Moreno  writes:
>>
>> > Netlink flags, although they don't have payload at the netlink level,
>> > are represented as having a "True" value in pyroute2.
>> >
>> > Without it, trying to add a flow with a flag-type action (e.g: pop_vlan)
>> > fails with the following traceback:
>> >
>> > Traceback (most recent call last):
>> >   File "[...]/ovs-dpctl.py", line 2498, in 
>> > sys.exit(main(sys.argv))
>> >  ^^
>> >   File "[...]/ovs-dpctl.py", line 2487, in main
>> > ovsflow.add_flow(rep["dpifindex"], flow)
>> >   File "[...]/ovs-dpctl.py", line 2136, in add_flow
>> > reply = self.nlm_request(
>> > ^
>> >   File "[...]/pyroute2/netlink/nlsocket.py", line 822, in nlm_request
>> > return tuple(self._genlm_request(*argv, **kwarg))
>> >  ^^^
>> >   File "[...]/pyroute2/netlink/generic/__init__.py", line 126, in
>> > nlm_request
>> > return tuple(super().nlm_request(*argv, **kwarg))
>> >^^
>> >   File "[...]/pyroute2/netlink/nlsocket.py", line 1124, in nlm_request
>> > self.put(msg, msg_type, msg_flags, msg_seq=msg_seq)
>> >   File "[...]/pyroute2/netlink/nlsocket.py", line 389, in put
>> > self.sendto_gate(msg, addr)
>> >   File "[...]/pyroute2/netlink/nlsocket.py", line 1056, in sendto_gate
>> > msg.encode()
>> >   File "[...]/pyroute2/netlink/__init__.py", line 1245, in encode
>> > offset = self.encode_nlas(offset)
>> >  
>> >   File "[...]/pyroute2/netlink/__init__.py", line 1560, in encode_nlas
>> > nla_instance.setvalue(cell[1])
>> >   File "[...]/pyroute2/netlink/__init__.py", line 1265, in setvalue
>> > nlv.setvalue(nla_tuple[1])
>> >  ~^^^
>> > IndexError: list index out of range
>> >
>> > Signed-off-by: Adrian Moreno 
>> > ---
>>
>> Acked-by: Aaron Conole 
>>
>> I don't know which pyroute2 version I had used when I tested this
>> previously, but even on my current system I get this error now.  Thanks
>> for the fix.
>>
>
> Thanks Aaron. I'll resend as v2 with your ack as a stand-alone patch
> since the other patch of this series will be fixed by your soon-to-come
> series.

Thanks!

>> >  tools/testing/selftests/net/openvswitch/ovs-dpctl.py | 2 +-
>> >  1 file changed, 1 insertion(+), 1 deletion(-)
>> >
>> > diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py 
>> > b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
>> > index b76907ac0092..a2395c3f37a1 100644
>> > --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
>> > +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
>> > @@ -537,7 +537,7 @@ class ovsactions(nla):
>> >  for flat_act in parse_flat_map:
>> >  if parse_starts_block(actstr, flat_act[0], False):
>> >  actstr = actstr[len(flat_act[0]):]
>> > -self["attrs"].append([flat_act[1]])
>> > +self["attrs"].append([flat_act[1], True])
>> >  actstr = actstr[strspn(actstr, ", ") :]
>> >  parsed = True
>>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [RFC net-next 6/7] selftests: net: Use the provided dpctl rather than the vswitchd for tests.

2024-06-14 Thread Aaron Conole
Hi Stefano,

Thanks for the review!

Stefano Brivio  writes:

> On Thu, 13 Jun 2024 14:13:32 -0400
> Aaron Conole  wrote:
>
>> The current pmtu test infrastucture requires an installed copy of the
>> ovs-vswitchd userspace.  This means that any automated or constrained
>> environments may not have the requisite tools to run the tests.  However,
>> the pmtu tests don't require any special classifier processing.  Indeed
>> they are only using the vswitchd in the most basic mode - as a NORMAL
>> switch.
>> 
>> However, the ovs-dpctl kernel utility can now program all the needed basic
>> flows to allow traffic to traverse the tunnels and provide support for at
>> least testing some basic pmtu scenarios.
>
> I didn't know about that tool, that looks like a nice improvement. A
> few comments below (mostly nits):

It didn't at the time, so no worries :)

>> More complicated flow pipelines
>> can be added to the internal ovs test infrastructure, but that is work for
>> the future.  For now, enable the most common cases - wide mega flows with
>> no other prerequisites.
>> 
>> Signed-off-by: Aaron Conole 
>> ---
>>  tools/testing/selftests/net/pmtu.sh | 87 ++---
>>  1 file changed, 67 insertions(+), 20 deletions(-)
>> 
>> diff --git a/tools/testing/selftests/net/pmtu.sh 
>> b/tools/testing/selftests/net/pmtu.sh
>> index cfc84958025a..7f4f35d88dcc 100755
>> --- a/tools/testing/selftests/net/pmtu.sh
>> +++ b/tools/testing/selftests/net/pmtu.sh
>> @@ -846,22 +846,73 @@ setup_ovs_vxlan_or_geneve() {
>>  type="${1}"
>>  a_addr="${2}"
>>  b_addr="${3}"
>> +dport="6081"
>>  
>>  if [ "${type}" = "vxlan" ]; then
>> +dport="4789"
>>  opts="${opts} ttl 64 dstport 4789"
>>  opts_b="local ${b_addr}"
>>  fi
>>  
>> -run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
>> -set interface ${type}_a type=${type} \
>> -options:remote_ip=${b_addr} options:key=1 options:csum=true || 
>> return 1
>> -
>> +run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 ${type}_a -t 
>> ${type}
>
> In some restricted environments, it might actually be more convenient
> to carry around ovs-vsctl than Python with (Python) libraries.
>
> Nowadays I typically (albeit seldom) run kselftests in throw-away VM
> images made by mbuto (https://mbuto.sh/, see demo on the right), and
> while it copies python3 and dynamic libraries from the host, adding
> Python libraries such as pyroute2 gets quite complicated.
>
> So I'm wondering, if it's not too messy: could we have two functions
> starting from approximately here (say, setup_ovs_dpctl() and
> setup_ovs_vsctl()), try with ovs-dpctl first, and, if that fails,
> fall back to ovs-vsctl?

It didn't seem to be too bad - so I went ahead and made that change.  It
tested well, so I'll resubmit it with that.

>>  run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1
>> ${opts_b} remote ${a_addr} ${opts} || return 1
>>  
>>  run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev 
>> ${type}_b
>>  run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev 
>> ${type}_b
>>  
>> +run_cmd ip link set ${type}_a up
>>  run_cmd ${ns_b} ip link set ${type}_b up
>> +
>> +ports=$(python3 ./openvswitch/ovs-dpctl.py show)
>> +br0_port=$(echo "$ports" | grep -E "\sovs_br0" | sed -e 's@port @@' | 
>> cut -d: -f1 | xargs)
>> +type_a_port=$(echo "$ports" | grep ${type}_a | sed -e 's@port @@' | cut 
>> -d: -f1 | xargs)
>> +veth_a_port=$(echo "$ports" | grep veth_A | sed -e 's@port @@' | cut 
>> -d: -f1 | xargs)
>> +
>> +v4_a_tun="${prefix4}.${a_r1}.1"
>> +v4_b_tun="${prefix4}.${b_r1}.1"
>> +
>> +v6_a_tun="${prefix6}:${a_r1}::1"
>> +v6_b_tun="${prefix6}:${b_r1}::1"
>> +
>> +if [ "${v4_a_tun}" == "${a_addr}" ]; then
>
> I see now that 05d92cb0e919 ("selftests/net: change shebang to bash to
> support "source"") turned this into a Bash script (for no real reason,
> lib.sh could have simply been sourced with '.' instead).
>
> Other than that, this happily runs with dash and possibly others, and:
>
>   $ checkbashisms -f pmtu.sh 
>   possible bashism in pmtu.sh line 201 (should be '.', not 'source'):
>   source lib.sh
>   possible bashism in pmtu.sh line 202 (should be '.', not 'source'):
>   source net_helper.sh
>
> Would it be possible to change this to POSIX shell:
>
>   if [ "${v4_a_tun}" = "${a_addr}" ]; then
>
> even just for consistency with the rest of the file?

done.

>> +run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
>> +
>> "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \
>> +
>> "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
>> +run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
>> +
>> "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \
>> +  

Re: [ovs-dev] [PATCH v2] netdev-dpdk: Use LSC interrupt mode.

2024-06-14 Thread David Marchand
On Fri, Jun 14, 2024 at 10:17 AM Robin Jarry  wrote:
>
> David Marchand, Jun 14, 2024 at 08:48:
> > Querying link status may get delayed for an undeterministic (long) time
> > with mlx5 ports. This is a consequence of the mlx5 driver calling ethtool
> > kernel API and getting stuck on the kernel RTNL lock while some other
> > operation is in progress under this lock.
> >
> > One impact for long link status query is that it is called under the bond
> > lock taken in write mode periodically in bond_run().
> > In parallel, datapath threads may block requesting to read bonding related
> > info (like for example in bond_check_admissibility()).
> >
> > The LSC interrupt mode is available with many DPDK drivers and is used by
> > default with testpmd.
> >
> > It seems safe enough to switch on this feature by default in OVS.
> > We keep the per interface option to disable this feature in case of an
> > unforeseen bug.
> >
> > Signed-off-by: David Marchand 
> > ---
> > Changes since v1:
> > - (early) fail when interrupt lsc is requested by user but not supported
> >   by the driver,
> > - otherwise, log a debug message if user did not request interrupt mode,
>
> This looks good to me. Is there a chance that this could be backported
> to the 3.1 branch?

Well, this changes a default behavior, so it may not be welcome in a
stable branch.
On the other hand, this helps resolving (not always trivial) random
packet drops, so it is worth considering.


-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] netdev-dpdk: Use LSC interrupt mode.

2024-06-14 Thread David Marchand
On Fri, Jun 14, 2024 at 10:40 AM Robin Jarry  wrote:
>
> David Marchand, Jun 14, 2024 at 08:48:
> > diff --git a/NEWS b/NEWS
> > index 5ae0108d55..1e19beb793 100644
> > --- a/NEWS
> > +++ b/NEWS
> > @@ -9,6 +9,9 @@ Post-v3.3.0
> >   https://github.com/openvswitch/ovs.git
> > - DPDK:
> >   * OVS validated with DPDK 23.11.1.
> > + * Link status changes are now handled via interrupt mode if the DPDK
> > +   driver supports it.  It is possible to revert to polling mode by 
> > setting
> > +   per interface 'dpdk-lsc-interrupt' other_config to 'false'.
>
> Nit pick: this setting is options:dpdk-lsc-interrupt not in
> other_config.
>

Oh, indeed.. fixed in v3.
Thanks Robin.

-- 
David Marchand

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


[ovs-dev] [PATCH v3] netdev-dpdk: Use LSC interrupt mode.

2024-06-14 Thread David Marchand
Querying link status may get delayed for an undeterministic (long) time
with mlx5 ports. This is a consequence of the mlx5 driver calling ethtool
kernel API and getting stuck on the kernel RTNL lock while some other
operation is in progress under this lock.

One impact for long link status query is that it is called under the bond
lock taken in write mode periodically in bond_run().
In parallel, datapath threads may block requesting to read bonding related
info (like for example in bond_check_admissibility()).

The LSC interrupt mode is available with many DPDK drivers and is used by
default with testpmd.

It seems safe enough to switch on this feature by default in OVS.
We keep the per interface option to disable this feature in case of an
unforeseen bug.

Signed-off-by: David Marchand 
Reviewed-by: Robin Jarry 
Acked-by: Mike Pattrick 
---
Changes since v2:
- fixed typo in NEWS,

Changes since v1:
- (early) fail when interrupt lsc is requested by user but not supported
  by the driver,
- otherwise, log a debug message if user did not request interrupt mode,

---
 Documentation/topics/dpdk/phy.rst |  4 ++--
 NEWS  |  3 +++
 lib/netdev-dpdk.c | 13 -
 vswitchd/vswitch.xml  |  8 
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/Documentation/topics/dpdk/phy.rst 
b/Documentation/topics/dpdk/phy.rst
index efd168cba8..eefc25613d 100644
--- a/Documentation/topics/dpdk/phy.rst
+++ b/Documentation/topics/dpdk/phy.rst
@@ -546,8 +546,8 @@ the firmware every time to fulfil this request.
 
 Note that not all PMD drivers support LSC interrupts.
 
-The default configuration is polling mode. To set interrupt mode, option
-``dpdk-lsc-interrupt`` has to be set to ``true``.
+The default configuration is interrupt mode. To set polling mode, option
+``dpdk-lsc-interrupt`` has to be set to ``false``.
 
 Command to set interrupt mode for a specific interface::
 $ ovs-vsctl set interface  options:dpdk-lsc-interrupt=true
diff --git a/NEWS b/NEWS
index 5ae0108d55..d05f2d0f89 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,9 @@ Post-v3.3.0
  https://github.com/openvswitch/ovs.git
- DPDK:
  * OVS validated with DPDK 23.11.1.
+ * Link status changes are now handled via interrupt mode if the DPDK
+   driver supports it.  It is possible to revert to polling mode by setting
+   per interface 'options:dpdk-lsc-interrupt' to 'false'.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 0fa37d5145..a260bc8485 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -2397,7 +2397,18 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
struct smap *args,
 }
 }
 
-lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", false);
+lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", true);
+if (lsc_interrupt_mode && !(*info.dev_flags & RTE_ETH_DEV_INTR_LSC)) {
+if (smap_get(args, "dpdk-lsc-interrupt")) {
+VLOG_ERR("interface '%s': link status interrupt is not supported.",
+ netdev_get_name(netdev));
+err = EINVAL;
+goto out;
+}
+VLOG_DBG("interface '%s': not enabling link status interrupt.",
+ netdev_get_name(netdev));
+lsc_interrupt_mode = false;
+}
 if (dev->requested_lsc_interrupt_mode != lsc_interrupt_mode) {
 dev->requested_lsc_interrupt_mode = lsc_interrupt_mode;
 netdev_request_reconfigure(netdev);
diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
index 8a1b607d71..e3afb78a4e 100644
--- a/vswitchd/vswitch.xml
+++ b/vswitchd/vswitch.xml
@@ -4647,12 +4647,12 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 
type=patch options:peer=p1 \
   
 
-  Set this value to true to configure interrupt mode for
-  Link State Change (LSC) detection instead of poll mode for the DPDK
-  interface.
+  Set this value to false to configure poll mode for
+  Link State Change (LSC) detection instead of interrupt mode for the
+  DPDK interface.
 
 
-  If this value is not set, poll mode is configured.
+  If this value is not set, interrupt mode is configured.
 
 
   This parameter has an effect only on netdev dpdk interfaces.
-- 
2.45.1

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] netdev-dpdk: Use LSC interrupt mode.

2024-06-14 Thread Mike Pattrick
On Fri, Jun 14, 2024 at 2:48 AM David Marchand
 wrote:
>
> Querying link status may get delayed for an undeterministic (long) time
> with mlx5 ports. This is a consequence of the mlx5 driver calling ethtool
> kernel API and getting stuck on the kernel RTNL lock while some other
> operation is in progress under this lock.
>
> One impact for long link status query is that it is called under the bond
> lock taken in write mode periodically in bond_run().
> In parallel, datapath threads may block requesting to read bonding related
> info (like for example in bond_check_admissibility()).
>
> The LSC interrupt mode is available with many DPDK drivers and is used by
> default with testpmd.
>
> It seems safe enough to switch on this feature by default in OVS.
> We keep the per interface option to disable this feature in case of an
> unforeseen bug.
>
> Signed-off-by: David Marchand 
> ---
> Changes since v1:
> - (early) fail when interrupt lsc is requested by user but not supported
>   by the driver,
> - otherwise, log a debug message if user did not request interrupt mode,
>
> ---
>  Documentation/topics/dpdk/phy.rst |  4 ++--
>  NEWS  |  3 +++
>  lib/netdev-dpdk.c | 13 -
>  vswitchd/vswitch.xml  |  8 
>  4 files changed, 21 insertions(+), 7 deletions(-)
>
> diff --git a/Documentation/topics/dpdk/phy.rst 
> b/Documentation/topics/dpdk/phy.rst
> index efd168cba8..eefc25613d 100644
> --- a/Documentation/topics/dpdk/phy.rst
> +++ b/Documentation/topics/dpdk/phy.rst
> @@ -546,8 +546,8 @@ the firmware every time to fulfil this request.
>
>  Note that not all PMD drivers support LSC interrupts.
>
> -The default configuration is polling mode. To set interrupt mode, option
> -``dpdk-lsc-interrupt`` has to be set to ``true``.
> +The default configuration is interrupt mode. To set polling mode, option
> +``dpdk-lsc-interrupt`` has to be set to ``false``.
>
>  Command to set interrupt mode for a specific interface::
>  $ ovs-vsctl set interface  options:dpdk-lsc-interrupt=true
> diff --git a/NEWS b/NEWS
> index 5ae0108d55..1e19beb793 100644
> --- a/NEWS
> +++ b/NEWS
> @@ -9,6 +9,9 @@ Post-v3.3.0
>   https://github.com/openvswitch/ovs.git
> - DPDK:
>   * OVS validated with DPDK 23.11.1.
> + * Link status changes are now handled via interrupt mode if the DPDK
> +   driver supports it.  It is possible to revert to polling mode by 
> setting
> +   per interface 'dpdk-lsc-interrupt' other_config to 'false'.

As Robin points out, other_config should be changed to options.But the
rest looks good.

Acked-by: Mike Pattrick 

>
>
>  v3.3.0 - 16 Feb 2024
> diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
> index 0fa37d5145..a260bc8485 100644
> --- a/lib/netdev-dpdk.c
> +++ b/lib/netdev-dpdk.c
> @@ -2397,7 +2397,18 @@ netdev_dpdk_set_config(struct netdev *netdev, const 
> struct smap *args,
>  }
>  }
>
> -lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", false);
> +lsc_interrupt_mode = smap_get_bool(args, "dpdk-lsc-interrupt", true);
> +if (lsc_interrupt_mode && !(*info.dev_flags & RTE_ETH_DEV_INTR_LSC)) {
> +if (smap_get(args, "dpdk-lsc-interrupt")) {
> +VLOG_ERR("interface '%s': link status interrupt is not 
> supported.",
> + netdev_get_name(netdev));
> +err = EINVAL;
> +goto out;
> +}
> +VLOG_DBG("interface '%s': not enabling link status interrupt.",
> + netdev_get_name(netdev));
> +lsc_interrupt_mode = false;
> +}
>  if (dev->requested_lsc_interrupt_mode != lsc_interrupt_mode) {
>  dev->requested_lsc_interrupt_mode = lsc_interrupt_mode;
>  netdev_request_reconfigure(netdev);
> diff --git a/vswitchd/vswitch.xml b/vswitchd/vswitch.xml
> index 8a1b607d71..e3afb78a4e 100644
> --- a/vswitchd/vswitch.xml
> +++ b/vswitchd/vswitch.xml
> @@ -4647,12 +4647,12 @@ ovs-vsctl add-port br0 p0 -- set Interface p0 
> type=patch options:peer=p1 \
>type='{"type": "boolean"}'>
>  
> -  Set this value to true to configure interrupt mode for
> -  Link State Change (LSC) detection instead of poll mode for the DPDK
> -  interface.
> +  Set this value to false to configure poll mode for
> +  Link State Change (LSC) detection instead of interrupt mode for the
> +  DPDK interface.
>  
>  
> -  If this value is not set, poll mode is configured.
> +  If this value is not set, interrupt mode is configured.
>  
>  
>This parameter has an effect only on netdev dpdk interfaces.
> --
> 2.44.0
>

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH] dpif-netdev: Disable XPS (Transmit Packet Steering) for non-pmd ports.

2024-06-14 Thread Ilya Maximets
On 6/13/24 12:15, Eli Britstein wrote:
> 
> 
>> -Original Message-
>> From: Ilya Maximets 
>> Sent: Monday, 10 June 2024 15:53
>> To: Roi Dayan ; d...@openvswitch.org
>> Cc: Eli Britstein ; Maor Dickman ;
>> i.maxim...@ovn.org
>> Subject: Re: [ovs-dev] [PATCH] dpif-netdev: Disable XPS (Transmit Packet
>> Steering) for non-pmd ports.
>>
>> External email: Use caution opening links or attachments
>>
>>
>> On 6/9/24 12:16, Roi Dayan via dev wrote:
>>> From: Eli Britstein 
>>>
>>> In the cited commit, XPS was introduced. It is NA for non-pmd ports.
>>> Upon port creation it is indeed disabled, but at port reconfigure, the
>>> condition of netdev_is_pmd() is missing.
>>> As a result, XPS is configured, and such messages are repeating in the log:
>>>   DBG|Core 2: New TX queue ID 0 for port 'v1_r'.
>>> Fix it.
>>
>> Hi, Eli.  Thanks for the patch!
>>
>> While it's maybe true that it was an original intention to not have XPS 
>> engaged
>> for non-PMD ports (frankly, I don't remember), the behavior was changed
>> quickly after in commit:
>>  e32971b8ddb4 ("dpif-netdev: Centralized threads and queues handling
>> code.") The logic was centralized in the reconfiguration code and no port is
>> actually used until it went through datapath reconfiguration.
>>
>> And later we had AF_XDP ports introduced and even afxdp-nonpmd.  For
>> these it is still important to have balanced use of Tx queues even if the 
>> port is
>> not polled by PMD threads on Rx side.
>>
>> We also changed netdev_send() API to include 'concurrent_txq' flag to make
>> the netdev implementation know if it needs to lock the queue before using.
>> Default STATIC mode doesn't set this flag.
>> This also means that we can't actually supply Tx queue IDs to netdev_send()
>> out of range of the allocated queues, since netdev implementation will have
>> to lock every time otherwise.  STATIC mode will use out-of-range queue IDs
>> with this change applied.
> Thanks for this explanation. Let me clarify the scenario:
> We add a veth port to netdev bridge (dpif-netdev). It doesn't have multiple 
> queues, so it is 1.
> The "wanted_txqs" is 2 at the minimum - one for at least one PMD, and another 
> for the main thread.
> The veth port is still configured as XPS, though there are no multiple TX 
> queues.
> When I looked back to what condition to add, I saw the is_pmd in the cited 
> commit.
> How about this then?
> 
> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c
> index 5196183ff..dac7de851 100644
> --- a/lib/dpif-netdev.c
> +++ b/lib/dpif-netdev.c
> @@ -6646,7 +6646,8 @@ reconfigure_datapath(struct dp_netdev *dp)
>  if (port->txq_requested_mode == TXQ_REQ_MODE_HASH &&
>  netdev_n_txq(port->netdev) > 1) {
>  port->txq_mode = TXQ_MODE_XPS_HASH;
> -} else if (netdev_n_txq(port->netdev) < wanted_txqs) {
> +} else if (netdev_n_txq(port->netdev) > 1 &&
> +   netdev_n_txq(port->netdev) < wanted_txqs) {
>  port->txq_mode = TXQ_MODE_XPS;
>  } else {
>  port->txq_mode = TXQ_MODE_STATIC;
> 

This will still break non-pmd ports that support multi-queue, e.g.
afxdp-nonpmd.

>>
>> With that, I don't think we can accept this change.  At the current state of 
>> the
>> netdev API, dpif-netdev should never actually use Tx queue IDs out of the
>> allocated range and it must set 'concurrent_txq' flag whenever queues can be
>> shared, otherwise we'll get data races and crashes on out-of-range memory
>> accesses.
> Do you mean TXQ_MODE_STATIC is broken regardless of this change? I guess if 
> so it
> is currently hidden as it is never used.

It's not broken, but it must not be used (and it is not being used) if number of
available queues is less than number of threads.  If you have a device with 8 
queues
and only 4 threads in OVS, TXQ_MODE_STATIC will be used and will work fine.

Why the debug log message is a problem?

>>
>> We should technically remove all the 'qid % n_txq' stuff from all the netdev
>> implementations and replace them with ovs_assert() on the API level.  We
>> had a few patches for that in the past, but they didn't get proper attention
>> and went stale.
> Maybe, but it's not related to this commit.
>>
>> Best regards, Ilya Maximets.
>>
>>>
>>> Fixes: 324c8374852a ("dpif-netdev: XPS (Transmit Packet Steering)
>>> implementation.")
>>> Signed-off-by: Eli Britstein 
>>> Acked-by: Roi Dayan 
>>> ---
>>>  lib/dpif-netdev.c | 2 +-
>>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>>
>>> diff --git a/lib/dpif-netdev.c b/lib/dpif-netdev.c index
>>> c7f9e149025e..94e1204575ea 100644
>>> --- a/lib/dpif-netdev.c
>>> +++ b/lib/dpif-netdev.c
>>> @@ -6804,7 +6804,7 @@ reconfigure_datapath(struct dp_netdev *dp)
>>>  if (port->txq_requested_mode == TXQ_REQ_MODE_HASH &&
>>>  netdev_n_txq(port->netdev) > 1) {
>>>  port->txq_mode = TXQ_MODE_XPS_HASH;
>>> -} else if (netdev_n_tx

Re: [ovs-dev] [PATCH] ofp-prop: Fix unaligned 128 bit access.

2024-06-14 Thread Ilya Maximets
On 6/12/24 20:12, Mike Pattrick wrote:
> On Wed, Jun 12, 2024 at 9:50 AM Ales Musil  wrote:
> 
>>
>>
>> On Wed, Jun 12, 2024 at 3:32 PM Mike Pattrick  wrote:
>>
>>> When compiling with '-fsanitize=address,undefined', the "ovs-ofctl
>>> ct-flush" test will yield the following undefined behavior flagged
>>> by UBSan. This patch uses memcpy to move the 128bit value into the
>>> stack before reading it.
>>>
>>> lib/ofp-prop.c:277:14: runtime error: load of misaligned address
>>> for type 'union ovs_be128', which requires 8 byte alignment
>>>   ^
>>> #0 0x7735d4 in ofpprop_parse_u128 lib/ofp-prop.c:277
>>> #1 0x6c6c83 in ofp_ct_match_decode lib/ofp-ct.c:529
>>> #2 0x76f3b5 in ofp_print_nxt_ct_flush lib/ofp-print.c:959
>>> #3 0x76f3b5 in ofp_to_string__ lib/ofp-print.c:1206
>>> #4 0x76f3b5 in ofp_to_string lib/ofp-print.c:1264
>>> #5 0x770c0d in ofp_print lib/ofp-print.c:1308
>>> #6 0x484a9d in ofctl_ofp_print utilities/ovs-ofctl.c:4899
>>> #7 0x4ddb77 in ovs_cmdl_run_command__ lib/command-line.c:247
>>> #8 0x47f6b3 in main utilities/ovs-ofctl.c:186

Thanks for cleaning up the trace.  Please, also remove the '#' tags.
GitHub trats them as references to issues/PRs and that is annoying.

Also, while most of the addresses in the trace are not important and it's
good to strip or shorten them, the actual address where the memory access
happened is important here, so we can see what the actual alignment was.
Was it part of the original error message?  Clang usually provides them,
not sure about gcc.

>>>
>>> Signed-off-by: Mike Pattrick 
>>> ---
>>>
>>
>> Hi Mike,
>>
>> this is interesting, do you have an idea why it didn't fail in CI by now?
>> Also AFAIR the ofprops is supposed to be aligned to 8 bytes so unless the
>> buffer itself isn't allocated at an address that is not itself 8 bytes
>> aligned it shouldn't happen. In that case we might actually have a problem
>> with other sizes.
>>
> 
> Report is seen with gcc + ubsan, but not clang + ubsan. It is possible that
> this is only seen due the test, this warning wasn't seen live.

I agree with Ales on this one.  Properties supposed to be aligned.
We need to find why they are not.  i.e. is it a property itself or
something before it.

We may need to take similar approach as in commit:
  a5cc859a4228 ("ofp-actions: Use aligned structures when decoding ofp 
actions.")

> 
> Cheers,
> M
> 
> 
>>
>>
>>>  lib/ofp-prop.c | 4 +++-
>>>  1 file changed, 3 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/lib/ofp-prop.c b/lib/ofp-prop.c
>>> index 0a685750c..ed6365414 100644
>>> --- a/lib/ofp-prop.c
>>> +++ b/lib/ofp-prop.c
>>> @@ -271,10 +271,12 @@ enum ofperr
>>>  ofpprop_parse_u128(const struct ofpbuf *property, ovs_u128 *value)
>>>  {
>>>  ovs_be128 *p = property->msg;
>>> +ovs_be128 aligned;
>>>  if (ofpbuf_msgsize(property) != sizeof *p) {
>>>  return OFPERR_OFPBPC_BAD_LEN;
>>>  }
>>> -*value = ntoh128(*p);
>>> +memcpy(&aligned, p, sizeof aligned);

FWIW, this doesn't actually fix the issue.  At least not in all the
cases.  Compiler is free to make alignment assumptions based on the
pointer type, so we can still have unaligned access inside the memcpy.

Best regards, Ilya Maximets.
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [BUG][meter] ovs crash when add meter openflow

2024-06-14 Thread Ilya Maximets
On 6/14/24 13:33, Simon Jones wrote:
> ```
> Date:   Fri Jun 14 19:25:43 2024 +0800
> 
> bugfix of meter tc crash.
> 
> diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
> index 9fde5f7a9..d08c5a35f 100644
> --- a/lib/netdev-offload.c
> +++ b/lib/netdev-offload.c
> @@ -186,7 +186,6 @@ netdev_assign_flow_api(struct netdev *netdev)
>  ovsrcu_set(&netdev->flow_api, rfa->flow_api);
>  VLOG_INFO("%s: Assigned flow API '%s'.",
>netdev_get_name(netdev), rfa->flow_api->type);
> -return 0;
>  }
>  VLOG_DBG("%s: flow API '%s' is not suitable.",
>   netdev_get_name(netdev), rfa->flow_api->type);
> ```
> 
> 
> Simon Jones
> 
> 
> Simon Jones  于2024年6月14日周五 19:25写道:
> 
>> Maybe reason is this:
>> ```
>> @netdev_offload_tc and @netdev_offload_dpdk will always be register.
>> Then the @meter_set api will be called.
>> The @meter_set could be called only after @init_flow_api, but the BUG
>> happens when @meter_set is called before @init_flow_api is called.
>>
>> Check these code:
>>
>> static int
>> netdev_assign_flow_api(struct netdev *netdev)
>> {
>> struct netdev_registered_flow_api *rfa;
>>
>> CMAP_FOR_EACH (rfa, cmap_node, &netdev_flow_apis) {
>> if (!rfa->flow_api->init_flow_api(netdev)) {
>> ovs_refcount_ref(&rfa->refcnt);
>> ovsrcu_set(&netdev->flow_api, rfa->flow_api);
>> VLOG_INFO("%s: Assigned flow API '%s'.",
>>   netdev_get_name(netdev), rfa->flow_api->type);
>> return 0;
>> }
>> VLOG_DBG("%s: flow API '%s' is not suitable.",
>>  netdev_get_name(netdev), rfa->flow_api->type);
>> }
>> VLOG_INFO("%s: No suitable flow API found.", netdev_get_name(netdev));
>>
>> return -1;
>> }
>>
>> ONLY one type of rfa->flow_api->init_flow_api (DPDK or TC) could be called.
>> Because after one called, then return 0.
>> ```
>>
>> So I suggest to just remove 'return 0'.

Hi, Simon.  This is not a correct thing to do.  By design, only one flow API
should be able to accept a particular port type.  If two API implementations
can accept the same port, that would be a bug.  Also, initializing more than
one API will cause resource leaks and potentially incorrect datapath behavior.

Since you're using userspace datapath, init_flow_api() from the TC 
implementation
should always fail.

What is your OVS version?

Best regards, Ilya Maximets.

>> Like this patch:
>>
>> 
>> Simon Jones
>>
>>
>> Simon Jones  于2024年6月11日周二 17:32写道:
>>
>>>
>>> Hi all,
>>>
>>> I'm using ovs-dpdk with this patch:
>>> ```
>>> commit 4c226944f7c55c9d6e7c85f7c33c5ce11c35ce54
>>> Author: Jianbo Liu 
>>> Date:   Fri Jul 8 03:06:26 2022 +
>>>
>>> netdev-offload-tc: Implement meter offload API for tc
>>> ```
>>> This patch is for offload flow meter by tc.
>>>
>>> Now I found a bug: ovs crash when add meter openflow.
>>>
>>> 1. How to produce:
>>> (NOTICE: This bug is not always reproducible.)
>>> ```
>>> Add these commands:
>>>
>>> ovs-ofctl -O OpenFlow13 add-meter br-int
>>> meter=1,kbps,band=type=drop,rate=1000
>>> ovs-ofctl -O OpenFlow13 add-flow br-int in_port=\"pf0vf0\",ip,nw_src=
>>> 16.0.0.0/24,nw_dst=48.0.0.0/24,nw_proto=17,actions=\
>>> 
>>> "meter:1,output:p0\"
>>> ovs-ofctl -O OpenFlow13 add-flow br-int
>>> in_port=\"pf0vf0\",udp6,ipv6_src=2001:4860:0:2001::/64,ipv6_dst=2001:0:4137:9350:8000:f12a:b9c8:2815,actions=\"meter:1,output:p0\"
>>>
>>> Then ovs crash, this is core file call trace:
>>>
>>> (gdb) bt
>>> #0  id_pool_alloc_id (pool=0x0, id_=id_@entry=0x7fff180c841c) at
>>> lib/id-pool.c:112
>>> #1  0x0055f0a0 in meter_alloc_police_index
>>> (police_index=0x7fff180c841c) at lib/netdev-offload-tc.c:2530
>>> #2  meter_tc_set_policer (meter_id=..., config=0x7fff180c8538) at
>>> lib/netdev-offload-tc.c:2567
>>> #3  0x004af207 in meter_offload_set (meter_id=meter_id@entry=...,
>>> config=config@entry=0x7fff180c8538) at lib/netdev-offload.c:207
>>> #4  0x00474c2b in dpif_netdev_meter_set (dpif=,
>>> meter_id=..., config=0x7fff180c8538) at lib/dpif-netdev.c:9137
>>> #5  0x0048e3fd in dpif_meter_set (dpif=0x28cfd10, meter_id=...,
>>> config=) at lib/dpif.c:1973
>>> #6  0x0042daf8 in meter_set (ofproto_=0x28cc670,
>>> meter_id=0x7fff180c8590, config=) at
>>> ofproto/ofproto-dpif.c:6751
>>> #7  0x00423e91 in handle_add_meter (mm=0x7fff180c8530,
>>> ofproto=0x28cc670) at ofproto/ofproto.c:6905
>>> #8  handle_meter_mod (ofconn=0x29073d0, oh=0x291bd40) at
>>> ofproto/ofproto.c:7013
>>> #9  0x00426d21 in handle_single_part_openflow
>>> (type=OFPTYPE_METER_MOD, oh=0x291bd40, ofconn=0x29073d0) at
>>> ofproto/ofproto.c:8668
>>> #10 handle_openflow (ofconn=0x29073d0, msgs=0x7fff180c88c0) at
>>> ofproto/ofproto.c:8843
>>> #11 0x0045b3b4 in ofconn_run (handle_openflow=0x4267b0
>>> , ofconn=0x29073d0) at ofp

[ovs-dev] [PATCH] vswitchd: Only lock pages that are faulted in.

2024-06-14 Thread Ilya Maximets
The main purpose of locking the memory is to ensure that OVS can keep
doing what it did before in case of increased memory pressure, e.g.,
during VM ingest / migration.  Fulfilling this requirement can be
achieved without locking all the allocated memory, but only the pages
already accessed in the past (faulted in).  Processing of the new
traffic involves new memory allocations.  Latency on these operations
can't be guaranteed by the locking.  The main difference would be
the pre-faulting of the stack memory.  However, in order to revalidate
or process upcalls on the same traffic, the same amount of stack is
likely needed, so all the necessary memory will already be faulted in.

Switch 'mlockall' to MCL_ONFAULT to avoid consuming unnecessarily
large amounts of RAM on systems with high core counts.  For example,
in a densely populated OVN cluster this saves about 650 MB of RAM per
node on a system with 64 cores.  This equates to 320 GB of allocated
but unused RAM in a 500 node cluster.

This also makes OVS better suited by default for small systems with
limited amount of memory.

The MCL_ONFAULT flag was introduced in Linux kernel 4.4 and wasn't
available at the time of '--mlockall' introduction, but we can use it
now.  Falling back to an old way of locking in case we're running on
an older kernel just in case.

Only locking the faulted in pages also makes locking compatible with
vhost post-copy live migration by default, because we'll no longer
pre-fault all the guest's memory.  Post-copy relies on userfaultfd
to work on shared huge pages, which is only available in 4.11+ kernels.
So, technically, it should not be possible for MCL_ONFAULT to fail and
the call without it to succeed.  But keeping the check just in case
for now.

Signed-off-by: Ilya Maximets 
---
 Documentation/ref/ovs-ctl.8.rst  |  5 +++--
 Documentation/topics/dpdk/vhost-user.rst |  6 --
 NEWS |  2 ++
 lib/netdev-dpdk.c|  2 +-
 lib/util.c   | 12 ++--
 lib/util.h   |  4 ++--
 vswitchd/ovs-vswitchd.8.in   |  9 +
 vswitchd/ovs-vswitchd.c  | 17 -
 8 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/Documentation/ref/ovs-ctl.8.rst b/Documentation/ref/ovs-ctl.8.rst
index 9f077a122..cdbaac4dc 100644
--- a/Documentation/ref/ovs-ctl.8.rst
+++ b/Documentation/ref/ovs-ctl.8.rst
@@ -170,8 +170,9 @@ The following options are less important:
 * ``--no-mlockall``
 
   By default ``ovs-ctl`` passes ``--mlockall`` to ``ovs-vswitchd``,
-  requesting that it lock all of its virtual memory, preventing it
-  from being paged to disk.  This option suppresses that behavior.
+  requesting that it lock all of its virtual memory on page fault (on
+  allocation, when running on Linux kernel 4.4 and older), preventing
+  it from being paged to disk.  This option suppresses that behavior.
 
 * ``--no-self-confinement``
 
diff --git a/Documentation/topics/dpdk/vhost-user.rst 
b/Documentation/topics/dpdk/vhost-user.rst
index 7866543d8..d9d87aa08 100644
--- a/Documentation/topics/dpdk/vhost-user.rst
+++ b/Documentation/topics/dpdk/vhost-user.rst
@@ -340,8 +340,10 @@ The default value is ``false``.
 fixes (like userfaulfd leak) was released in 3.0.1.
 
 DPDK Post-copy feature requires avoiding to populate the guest memory
-(application must not call mlock* syscall). So enabling mlockall is
-incompatible with post-copy feature.
+(application must not call mlock* syscall without MCL_ONFAULT).
+So enabling mlockall is incompatible with post-copy feature in OVS 3.3 and
+older. Newer versions of OVS only lock memory pages that are faulted in,
+so both features can be used at the same time.
 
 Note that during migration of vhost-user device, PMD threads hang for the
 time of faulted pages download from source host. Transferring 1GB hugepage
diff --git a/NEWS b/NEWS
index 5ae0108d5..66c370f20 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,7 @@
 Post-v3.3.0
 
+   - Option '--mlockall' now only locks memory pages on fault, if possible.
+ This also makes it compatible with vHost Post-copy Live Migration.
- Userspace datapath:
  * Conntrack now supports 'random' flag for selecting ports in a range
while natting and 'persistent' flag for selection of the IP address
diff --git a/lib/netdev-dpdk.c b/lib/netdev-dpdk.c
index 0fa37d514..bdc08bcf5 100644
--- a/lib/netdev-dpdk.c
+++ b/lib/netdev-dpdk.c
@@ -6704,7 +6704,7 @@ parse_vhost_config(const struct smap *ovs_other_config)
 
 vhost_postcopy_enabled = smap_get_bool(ovs_other_config,
"vhost-postcopy-support", false);
-if (vhost_postcopy_enabled && memory_locked()) {
+if (vhost_postcopy_enabled && memory_all_locked()) {
 VLOG_WARN("vhost-postcopy-support and mlockall are not compatible.");
 vhost_po

Re: [ovs-dev] [BUG][meter] ovs crash when add meter openflow

2024-06-14 Thread Simon Jones
```
Date:   Fri Jun 14 19:25:43 2024 +0800

bugfix of meter tc crash.

diff --git a/lib/netdev-offload.c b/lib/netdev-offload.c
index 9fde5f7a9..d08c5a35f 100644
--- a/lib/netdev-offload.c
+++ b/lib/netdev-offload.c
@@ -186,7 +186,6 @@ netdev_assign_flow_api(struct netdev *netdev)
 ovsrcu_set(&netdev->flow_api, rfa->flow_api);
 VLOG_INFO("%s: Assigned flow API '%s'.",
   netdev_get_name(netdev), rfa->flow_api->type);
-return 0;
 }
 VLOG_DBG("%s: flow API '%s' is not suitable.",
  netdev_get_name(netdev), rfa->flow_api->type);
```


Simon Jones


Simon Jones  于2024年6月14日周五 19:25写道:

> Maybe reason is this:
> ```
> @netdev_offload_tc and @netdev_offload_dpdk will always be register.
> Then the @meter_set api will be called.
> The @meter_set could be called only after @init_flow_api, but the BUG
> happens when @meter_set is called before @init_flow_api is called.
>
> Check these code:
>
> static int
> netdev_assign_flow_api(struct netdev *netdev)
> {
> struct netdev_registered_flow_api *rfa;
>
> CMAP_FOR_EACH (rfa, cmap_node, &netdev_flow_apis) {
> if (!rfa->flow_api->init_flow_api(netdev)) {
> ovs_refcount_ref(&rfa->refcnt);
> ovsrcu_set(&netdev->flow_api, rfa->flow_api);
> VLOG_INFO("%s: Assigned flow API '%s'.",
>   netdev_get_name(netdev), rfa->flow_api->type);
> return 0;
> }
> VLOG_DBG("%s: flow API '%s' is not suitable.",
>  netdev_get_name(netdev), rfa->flow_api->type);
> }
> VLOG_INFO("%s: No suitable flow API found.", netdev_get_name(netdev));
>
> return -1;
> }
>
> ONLY one type of rfa->flow_api->init_flow_api (DPDK or TC) could be called.
> Because after one called, then return 0.
> ```
>
> So I suggest to just remove 'return 0'.
> Like this patch:
>
> 
> Simon Jones
>
>
> Simon Jones  于2024年6月11日周二 17:32写道:
>
>>
>> Hi all,
>>
>> I'm using ovs-dpdk with this patch:
>> ```
>> commit 4c226944f7c55c9d6e7c85f7c33c5ce11c35ce54
>> Author: Jianbo Liu 
>> Date:   Fri Jul 8 03:06:26 2022 +
>>
>> netdev-offload-tc: Implement meter offload API for tc
>> ```
>> This patch is for offload flow meter by tc.
>>
>> Now I found a bug: ovs crash when add meter openflow.
>>
>> 1. How to produce:
>> (NOTICE: This bug is not always reproducible.)
>> ```
>> Add these commands:
>>
>> ovs-ofctl -O OpenFlow13 add-meter br-int
>> meter=1,kbps,band=type=drop,rate=1000
>> ovs-ofctl -O OpenFlow13 add-flow br-int in_port=\"pf0vf0\",ip,nw_src=
>> 16.0.0.0/24,nw_dst=48.0.0.0/24,nw_proto=17,actions=\
>> 
>> "meter:1,output:p0\"
>> ovs-ofctl -O OpenFlow13 add-flow br-int
>> in_port=\"pf0vf0\",udp6,ipv6_src=2001:4860:0:2001::/64,ipv6_dst=2001:0:4137:9350:8000:f12a:b9c8:2815,actions=\"meter:1,output:p0\"
>>
>> Then ovs crash, this is core file call trace:
>>
>> (gdb) bt
>> #0  id_pool_alloc_id (pool=0x0, id_=id_@entry=0x7fff180c841c) at
>> lib/id-pool.c:112
>> #1  0x0055f0a0 in meter_alloc_police_index
>> (police_index=0x7fff180c841c) at lib/netdev-offload-tc.c:2530
>> #2  meter_tc_set_policer (meter_id=..., config=0x7fff180c8538) at
>> lib/netdev-offload-tc.c:2567
>> #3  0x004af207 in meter_offload_set (meter_id=meter_id@entry=...,
>> config=config@entry=0x7fff180c8538) at lib/netdev-offload.c:207
>> #4  0x00474c2b in dpif_netdev_meter_set (dpif=,
>> meter_id=..., config=0x7fff180c8538) at lib/dpif-netdev.c:9137
>> #5  0x0048e3fd in dpif_meter_set (dpif=0x28cfd10, meter_id=...,
>> config=) at lib/dpif.c:1973
>> #6  0x0042daf8 in meter_set (ofproto_=0x28cc670,
>> meter_id=0x7fff180c8590, config=) at
>> ofproto/ofproto-dpif.c:6751
>> #7  0x00423e91 in handle_add_meter (mm=0x7fff180c8530,
>> ofproto=0x28cc670) at ofproto/ofproto.c:6905
>> #8  handle_meter_mod (ofconn=0x29073d0, oh=0x291bd40) at
>> ofproto/ofproto.c:7013
>> #9  0x00426d21 in handle_single_part_openflow
>> (type=OFPTYPE_METER_MOD, oh=0x291bd40, ofconn=0x29073d0) at
>> ofproto/ofproto.c:8668
>> #10 handle_openflow (ofconn=0x29073d0, msgs=0x7fff180c88c0) at
>> ofproto/ofproto.c:8843
>> #11 0x0045b3b4 in ofconn_run (handle_openflow=0x4267b0
>> , ofconn=0x29073d0) at ofproto/connmgr.c:1329
>> #12 connmgr_run (mgr=0x272a200, 
>> handle_openflow=handle_openflow@entry=0x4267b0
>> ) at ofproto/connmgr.c:356
>> #13 0x004209be in ofproto_run (p=0x28cc670) at
>> ofproto/ofproto.c:1964
>> #14 0x0040da44 in bridge_run__ () at vswitchd/bridge.c:3251
>> #15 0x004138ec in bridge_run () at vswitchd/bridge.c:3310
>> #16 0x0040955d in main (argc=, argv=> out>) at vswitchd/ovs-vswitchd.c:129
>>
>> ```
>>
>> 2. Check code:
>> ```
>> Notice
>> #0  id_pool_alloc_id (pool=0x0, id_=id_@entry=0x7fff180c841c) at
>> lib/id-pool.c:112
>> the @pool param is NULL.
>>
>> This is happened ONLY when @netdev_tc_init_f

Re: [ovs-dev] [BUG][meter] ovs crash when add meter openflow

2024-06-14 Thread Simon Jones
Maybe reason is this:
```
@netdev_offload_tc and @netdev_offload_dpdk will always be register.
Then the @meter_set api will be called.
The @meter_set could be called only after @init_flow_api, but the BUG
happens when @meter_set is called before @init_flow_api is called.

Check these code:

static int
netdev_assign_flow_api(struct netdev *netdev)
{
struct netdev_registered_flow_api *rfa;

CMAP_FOR_EACH (rfa, cmap_node, &netdev_flow_apis) {
if (!rfa->flow_api->init_flow_api(netdev)) {
ovs_refcount_ref(&rfa->refcnt);
ovsrcu_set(&netdev->flow_api, rfa->flow_api);
VLOG_INFO("%s: Assigned flow API '%s'.",
  netdev_get_name(netdev), rfa->flow_api->type);
return 0;
}
VLOG_DBG("%s: flow API '%s' is not suitable.",
 netdev_get_name(netdev), rfa->flow_api->type);
}
VLOG_INFO("%s: No suitable flow API found.", netdev_get_name(netdev));

return -1;
}

ONLY one type of rfa->flow_api->init_flow_api (DPDK or TC) could be called.
Because after one called, then return 0.
```

So I suggest to just remove 'return 0'.
Like this patch:


Simon Jones


Simon Jones  于2024年6月11日周二 17:32写道:

>
> Hi all,
>
> I'm using ovs-dpdk with this patch:
> ```
> commit 4c226944f7c55c9d6e7c85f7c33c5ce11c35ce54
> Author: Jianbo Liu 
> Date:   Fri Jul 8 03:06:26 2022 +
>
> netdev-offload-tc: Implement meter offload API for tc
> ```
> This patch is for offload flow meter by tc.
>
> Now I found a bug: ovs crash when add meter openflow.
>
> 1. How to produce:
> (NOTICE: This bug is not always reproducible.)
> ```
> Add these commands:
>
> ovs-ofctl -O OpenFlow13 add-meter br-int
> meter=1,kbps,band=type=drop,rate=1000
> ovs-ofctl -O OpenFlow13 add-flow br-int in_port=\"pf0vf0\",ip,nw_src=
> 16.0.0.0/24,nw_dst=48.0.0.0/24,nw_proto=17,actions=\
> 
> "meter:1,output:p0\"
> ovs-ofctl -O OpenFlow13 add-flow br-int
> in_port=\"pf0vf0\",udp6,ipv6_src=2001:4860:0:2001::/64,ipv6_dst=2001:0:4137:9350:8000:f12a:b9c8:2815,actions=\"meter:1,output:p0\"
>
> Then ovs crash, this is core file call trace:
>
> (gdb) bt
> #0  id_pool_alloc_id (pool=0x0, id_=id_@entry=0x7fff180c841c) at
> lib/id-pool.c:112
> #1  0x0055f0a0 in meter_alloc_police_index
> (police_index=0x7fff180c841c) at lib/netdev-offload-tc.c:2530
> #2  meter_tc_set_policer (meter_id=..., config=0x7fff180c8538) at
> lib/netdev-offload-tc.c:2567
> #3  0x004af207 in meter_offload_set (meter_id=meter_id@entry=...,
> config=config@entry=0x7fff180c8538) at lib/netdev-offload.c:207
> #4  0x00474c2b in dpif_netdev_meter_set (dpif=,
> meter_id=..., config=0x7fff180c8538) at lib/dpif-netdev.c:9137
> #5  0x0048e3fd in dpif_meter_set (dpif=0x28cfd10, meter_id=...,
> config=) at lib/dpif.c:1973
> #6  0x0042daf8 in meter_set (ofproto_=0x28cc670,
> meter_id=0x7fff180c8590, config=) at
> ofproto/ofproto-dpif.c:6751
> #7  0x00423e91 in handle_add_meter (mm=0x7fff180c8530,
> ofproto=0x28cc670) at ofproto/ofproto.c:6905
> #8  handle_meter_mod (ofconn=0x29073d0, oh=0x291bd40) at
> ofproto/ofproto.c:7013
> #9  0x00426d21 in handle_single_part_openflow
> (type=OFPTYPE_METER_MOD, oh=0x291bd40, ofconn=0x29073d0) at
> ofproto/ofproto.c:8668
> #10 handle_openflow (ofconn=0x29073d0, msgs=0x7fff180c88c0) at
> ofproto/ofproto.c:8843
> #11 0x0045b3b4 in ofconn_run (handle_openflow=0x4267b0
> , ofconn=0x29073d0) at ofproto/connmgr.c:1329
> #12 connmgr_run (mgr=0x272a200, handle_openflow=handle_openflow@entry=0x4267b0
> ) at ofproto/connmgr.c:356
> #13 0x004209be in ofproto_run (p=0x28cc670) at
> ofproto/ofproto.c:1964
> #14 0x0040da44 in bridge_run__ () at vswitchd/bridge.c:3251
> #15 0x004138ec in bridge_run () at vswitchd/bridge.c:3310
> #16 0x0040955d in main (argc=, argv= out>) at vswitchd/ovs-vswitchd.c:129
>
> ```
>
> 2. Check code:
> ```
> Notice
> #0  id_pool_alloc_id (pool=0x0, id_=id_@entry=0x7fff180c841c) at
> lib/id-pool.c:112
> the @pool param is NULL.
>
> This is happened ONLY when @netdev_tc_init_flow_api is NOT called.
> But in code, ONLY after @netdev_tc_init_flow_api is called, the
> @handle_meter_mod could works on netdev with system type.
> ```
>
> 3. My question:
> - How this BUG happen?
> - Is there patch to solve this BUG?
>
>
> 
> Simon Jones
>
___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [PATCH v2] ofproto: Add upcall/dump-ufid-rules command to map UFIDs to OpenFlow.

2024-06-14 Thread Dumitru Ceara
On 6/13/24 23:01, Mike Pattrick wrote:
> On Tue, Jun 4, 2024 at 7:29 AM Dumitru Ceara  wrote:
>>
>> It improves the debugging experience if we can easily get a list of
>> OpenFlow rules and groups that contribute to the creation of a datapath
>> flow.
>>
>> The suggested workflow is:
>> a. dump datapath flows (along with UUIDs), this also prints the core IDs
>> (PMD IDs) when applicable.
>>
>>   $ ovs-appctl dpctl/dump-flows -m
>>   flow-dump from pmd on cpu core: 7
>>   ufid:7460db8f..., recirc_id(0), 
>>
>> b. dump related OpenFlow rules and groups:
>>   $ ovs-appctl upcall/dump-ufid-rules ufid:7460db8f... pmd=7
>>   cookie=0x12345678, table=0 
>> priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
>>   cookie=0x0, table=1 priority=200,actions=group:1
>>   group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
>>   cookie=0x0, table=2 actions=output:1
>>
>> The new command only shows rules and groups attached to ukeys that are
>> in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
>> other ukeys should not be relevant for the use case presented above.
>>
>> For ukeys that don't have an xcache populated yet, the command goes
>> ahead and populates one.  In theory this is creates a slight overhead as
>> those ukeys might not need an xcache until they see traffic (and get
>> revalidated) but in practice the overhead should be minimal.
>>
>> This commit tries to mimic the output format of the ovs-ofctl
>> dump-flows/dump-groups commands.  For groups it actually uses
>> ofputil_group/_bucket functions for formatting.  For rules it uses
>> flow_stats_ds() (the original function was exported and renamed to
>> ofproto_rule_stats_ds()).
>>
>> Signed-off-by: Dumitru Ceara 
>> ---
>> V2:
>> - Addressed Adrian's comments:
>>   - check return value of populate_xcache()
>>   - use flow_stats_ds() (renamed to ofproto_rule_stats_ds()) instead of
>> custom printing
>>   - move ukey->state check to caller
>>   - handle case when group bucket is not available
>>   - update test case to cover the point above
>> ---
>>  NEWS|   3 +
>>  include/openvswitch/ofp-group.h |   7 ++
>>  lib/ofp-group.c | 110 +++-
>>  ofproto/ofproto-dpif-upcall.c   |  85 
>>  ofproto/ofproto-dpif.c  |  24 +++
>>  ofproto/ofproto-dpif.h  |   2 +
>>  ofproto/ofproto-provider.h  |   1 +
>>  ofproto/ofproto.c   |  85 
>>  tests/ofproto-dpif.at   |  47 ++
>>  tests/ofproto-macros.at |   4 ++
>>  10 files changed, 282 insertions(+), 86 deletions(-)
>>
>> diff --git a/NEWS b/NEWS
>> index 5ae0108d552b..1bc085f97045 100644
>> --- a/NEWS
>> +++ b/NEWS
>> @@ -9,6 +9,9 @@ Post-v3.3.0
>>   https://github.com/openvswitch/ovs.git
>> - DPDK:
>>   * OVS validated with DPDK 23.11.1.
>> +   - ovs-appctl:
>> + * Added 'upcall/dump-ufid-rules' to output the set of OpenFlow rules 
>> and
>> +   groups that contributed to the creation of a specific datapath flow.
>>
>>
>>  v3.3.0 - 16 Feb 2024
>> diff --git a/include/openvswitch/ofp-group.h 
>> b/include/openvswitch/ofp-group.h
>> index cd7af0ebff9c..79fcb3a4c0d1 100644
>> --- a/include/openvswitch/ofp-group.h
>> +++ b/include/openvswitch/ofp-group.h
>> @@ -70,6 +70,11 @@ struct ofputil_bucket *ofputil_bucket_find(const struct 
>> ovs_list *,
>>  bool ofputil_bucket_check_duplicate_id(const struct ovs_list *);
>>  struct ofputil_bucket *ofputil_bucket_list_front(const struct ovs_list *);
>>  struct ofputil_bucket *ofputil_bucket_list_back(const struct ovs_list *);
>> +void ofputil_bucket_format(const struct ofputil_bucket *,
>> +   enum ofp11_group_type, enum ofp_version,
>> +   const struct ofputil_port_map *,
>> +   const struct ofputil_table_map *,
>> +   struct ds *);
>>
>>  static inline bool
>>  ofputil_bucket_has_liveness(const struct ofputil_bucket *bucket)
>> @@ -88,6 +93,8 @@ struct ofputil_group_props {
>>  void ofputil_group_properties_destroy(struct ofputil_group_props *);
>>  void ofputil_group_properties_copy(struct ofputil_group_props *to,
>> const struct ofputil_group_props *from);
>> +void ofputil_group_properties_format(const struct ofputil_group_props *,
>> + struct ds *);
>>  /* Protocol-independent group_mod. */
>>  struct ofputil_group_mod {
>>  uint16_t command; /* One of OFPGC15_*. */
>> diff --git a/lib/ofp-group.c b/lib/ofp-group.c
>> index 737f48047b10..28504c068c60 100644
>> --- a/lib/ofp-group.c
>> +++ b/lib/ofp-group.c
>> @@ -58,14 +58,16 @@ ofputil_group_from_string(const char *s, uint32_t 
>> *group_idp)
>>  return true;
>>  }
>>
>> -/* Appends to 's' a string representation of the OpenFlow group ID 
>> 'group_id'.
>> - * Most groups' string rep

[ovs-dev] [PATCH v3] ofproto: Add upcall/dump-ufid-rules command to map UFIDs to OpenFlow.

2024-06-14 Thread Dumitru Ceara
It improves the debugging experience if we can easily get a list of
OpenFlow rules and groups that contribute to the creation of a datapath
flow.

The suggested workflow is:
a. dump datapath flows (along with UUIDs), this also prints the core IDs
(PMD IDs) when applicable.

  $ ovs-appctl dpctl/dump-flows -m
  flow-dump from pmd on cpu core: 7
  ufid:7460db8f..., recirc_id(0), 

b. dump related OpenFlow rules and groups:
  $ ovs-appctl upcall/dump-ufid-rules ufid:7460db8f... pmd=7
  cookie=0x12345678, table=0 
priority=100,ip,in_port=2,nw_dst=10.0.0.2,actions=resubmit(,1)
  cookie=0x0, table=1 priority=200,actions=group:1
  group_id=1,bucket=bucket_id:0,actions=ct(commit,table=2,nat(dst=20.0.0.2))
  cookie=0x0, table=2 actions=output:1

The new command only shows rules and groups attached to ukeys that are
in states UKEY_VISIBLE or UKEY_OPERATIONAL.  That should be fine as all
other ukeys should not be relevant for the use case presented above.

For ukeys that don't have an xcache populated yet, the command goes
ahead and populates one.  In theory this is creates a slight overhead as
those ukeys might not need an xcache until they see traffic (and get
revalidated) but in practice the overhead should be minimal.

This commit tries to mimic the output format of the ovs-ofctl
dump-flows/dump-groups commands.  For groups it actually uses
ofputil_group/_bucket functions for formatting.  For rules it uses
flow_stats_ds() (the original function was exported and renamed to
ofproto_rule_stats_ds()).

Signed-off-by: Dumitru Ceara 
---
V3:
- Addressed Mike's comments:
  - use ds_put_char()
  - make the test less flaky
V2:
- Addressed Adrian's comments:
  - check return value of populate_xcache()
  - use flow_stats_ds() (renamed to ofproto_rule_stats_ds()) instead of
custom printing
  - move ukey->state check to caller
  - handle case when group bucket is not available
  - update test case to cover the point above
---
 NEWS|   3 +
 include/openvswitch/ofp-group.h |   7 ++
 lib/ofp-group.c | 110 +++-
 ofproto/ofproto-dpif-upcall.c   |  85 
 ofproto/ofproto-dpif.c  |  24 +++
 ofproto/ofproto-dpif.h  |   2 +
 ofproto/ofproto-provider.h  |   1 +
 ofproto/ofproto.c   |  85 
 tests/ofproto-dpif.at   |  47 ++
 tests/ofproto-macros.at |   9 ++-
 10 files changed, 285 insertions(+), 88 deletions(-)

diff --git a/NEWS b/NEWS
index 5ae0108d552b..1bc085f97045 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,9 @@ Post-v3.3.0
  https://github.com/openvswitch/ovs.git
- DPDK:
  * OVS validated with DPDK 23.11.1.
+   - ovs-appctl:
+ * Added 'upcall/dump-ufid-rules' to output the set of OpenFlow rules and
+   groups that contributed to the creation of a specific datapath flow.
 
 
 v3.3.0 - 16 Feb 2024
diff --git a/include/openvswitch/ofp-group.h b/include/openvswitch/ofp-group.h
index cd7af0ebff9c..79fcb3a4c0d1 100644
--- a/include/openvswitch/ofp-group.h
+++ b/include/openvswitch/ofp-group.h
@@ -70,6 +70,11 @@ struct ofputil_bucket *ofputil_bucket_find(const struct 
ovs_list *,
 bool ofputil_bucket_check_duplicate_id(const struct ovs_list *);
 struct ofputil_bucket *ofputil_bucket_list_front(const struct ovs_list *);
 struct ofputil_bucket *ofputil_bucket_list_back(const struct ovs_list *);
+void ofputil_bucket_format(const struct ofputil_bucket *,
+   enum ofp11_group_type, enum ofp_version,
+   const struct ofputil_port_map *,
+   const struct ofputil_table_map *,
+   struct ds *);
 
 static inline bool
 ofputil_bucket_has_liveness(const struct ofputil_bucket *bucket)
@@ -88,6 +93,8 @@ struct ofputil_group_props {
 void ofputil_group_properties_destroy(struct ofputil_group_props *);
 void ofputil_group_properties_copy(struct ofputil_group_props *to,
const struct ofputil_group_props *from);
+void ofputil_group_properties_format(const struct ofputil_group_props *,
+ struct ds *);
 /* Protocol-independent group_mod. */
 struct ofputil_group_mod {
 uint16_t command; /* One of OFPGC15_*. */
diff --git a/lib/ofp-group.c b/lib/ofp-group.c
index 737f48047b10..0be453d15203 100644
--- a/lib/ofp-group.c
+++ b/lib/ofp-group.c
@@ -58,14 +58,16 @@ ofputil_group_from_string(const char *s, uint32_t 
*group_idp)
 return true;
 }
 
-/* Appends to 's' a string representation of the OpenFlow group ID 'group_id'.
- * Most groups' string representation is just the number, but for special
- * groups, e.g. OFPG_ALL, it is the name, e.g. "ALL". */
+/* Appends to 's' a string representation of the OpenFlow group 'group_id'.
+ * Most groups' string representation is just 'group_id=' followed by the ID,
+ * but for special groups, e.g. OFPG_ALL, the ID is replaced by the name

Re: [ovs-dev] [RFC net-next 6/7] selftests: net: Use the provided dpctl rather than the vswitchd for tests.

2024-06-14 Thread Stefano Brivio
On Thu, 13 Jun 2024 14:13:32 -0400
Aaron Conole  wrote:

> The current pmtu test infrastucture requires an installed copy of the
> ovs-vswitchd userspace.  This means that any automated or constrained
> environments may not have the requisite tools to run the tests.  However,
> the pmtu tests don't require any special classifier processing.  Indeed
> they are only using the vswitchd in the most basic mode - as a NORMAL
> switch.
> 
> However, the ovs-dpctl kernel utility can now program all the needed basic
> flows to allow traffic to traverse the tunnels and provide support for at
> least testing some basic pmtu scenarios.

I didn't know about that tool, that looks like a nice improvement. A
few comments below (mostly nits):

> More complicated flow pipelines
> can be added to the internal ovs test infrastructure, but that is work for
> the future.  For now, enable the most common cases - wide mega flows with
> no other prerequisites.
> 
> Signed-off-by: Aaron Conole 
> ---
>  tools/testing/selftests/net/pmtu.sh | 87 ++---
>  1 file changed, 67 insertions(+), 20 deletions(-)
> 
> diff --git a/tools/testing/selftests/net/pmtu.sh 
> b/tools/testing/selftests/net/pmtu.sh
> index cfc84958025a..7f4f35d88dcc 100755
> --- a/tools/testing/selftests/net/pmtu.sh
> +++ b/tools/testing/selftests/net/pmtu.sh
> @@ -846,22 +846,73 @@ setup_ovs_vxlan_or_geneve() {
>   type="${1}"
>   a_addr="${2}"
>   b_addr="${3}"
> + dport="6081"
>  
>   if [ "${type}" = "vxlan" ]; then
> + dport="4789"
>   opts="${opts} ttl 64 dstport 4789"
>   opts_b="local ${b_addr}"
>   fi
>  
> - run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
> - set interface ${type}_a type=${type} \
> - options:remote_ip=${b_addr} options:key=1 options:csum=true || 
> return 1
> -
> + run_cmd python3 ./openvswitch/ovs-dpctl.py add-if ovs_br0 ${type}_a -t 
> ${type}

In some restricted environments, it might actually be more convenient
to carry around ovs-vsctl than Python with (Python) libraries.

Nowadays I typically (albeit seldom) run kselftests in throw-away VM
images made by mbuto (https://mbuto.sh/, see demo on the right), and
while it copies python3 and dynamic libraries from the host, adding
Python libraries such as pyroute2 gets quite complicated.

So I'm wondering, if it's not too messy: could we have two functions
starting from approximately here (say, setup_ovs_dpctl() and
setup_ovs_vsctl()), try with ovs-dpctl first, and, if that fails,
fall back to ovs-vsctl?

>   run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} 
> remote ${a_addr} ${opts} || return 1
>  
>   run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev 
> ${type}_b
>   run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev 
> ${type}_b
>  
> + run_cmd ip link set ${type}_a up
>   run_cmd ${ns_b} ip link set ${type}_b up
> +
> + ports=$(python3 ./openvswitch/ovs-dpctl.py show)
> + br0_port=$(echo "$ports" | grep -E "\sovs_br0" | sed -e 's@port @@' | 
> cut -d: -f1 | xargs)
> + type_a_port=$(echo "$ports" | grep ${type}_a | sed -e 's@port @@' | cut 
> -d: -f1 | xargs)
> + veth_a_port=$(echo "$ports" | grep veth_A | sed -e 's@port @@' | cut 
> -d: -f1 | xargs)
> +
> + v4_a_tun="${prefix4}.${a_r1}.1"
> + v4_b_tun="${prefix4}.${b_r1}.1"
> +
> + v6_a_tun="${prefix6}:${a_r1}::1"
> + v6_b_tun="${prefix6}:${b_r1}::1"
> +
> + if [ "${v4_a_tun}" == "${a_addr}" ]; then

I see now that 05d92cb0e919 ("selftests/net: change shebang to bash to
support "source"") turned this into a Bash script (for no real reason,
lib.sh could have simply been sourced with '.' instead).

Other than that, this happily runs with dash and possibly others, and:

  $ checkbashisms -f pmtu.sh 
  possible bashism in pmtu.sh line 201 (should be '.', not 'source'):
  source lib.sh
  possible bashism in pmtu.sh line 202 (should be '.', not 'source'):
  source net_helper.sh

Would it be possible to change this to POSIX shell:

if [ "${v4_a_tun}" = "${a_addr}" ]; then

even just for consistency with the rest of the file?

> + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
> + 
> "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x0800),ipv4()" \
> + 
> "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
> + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
> + 
> "recirc_id(0),in_port(${veth_a_port}),eth(),eth_type(0x86dd),ipv6()" \
> + 
> "set(tunnel(tun_id=1,dst=${v4_b_tun},ttl=64,tp_dst=${dport},flags(df|csum))),${type_a_port}"
> + run_cmd python3 ./openvswitch/ovs-dpctl.py add-flow ovs_br0 \
> + 
> "recirc_id(0),tunnel(tun_id=1,src=${v4_b_tun},dst=${v4_a_tun}),in_port(${type_a_port}),eth(),eth_type(0x0800),ipv4()"
>  \
> + 

Re: [ovs-dev] [PATCH v2] netdev-dpdk: Use LSC interrupt mode.

2024-06-14 Thread Robin Jarry

David Marchand, Jun 14, 2024 at 08:48:

diff --git a/NEWS b/NEWS
index 5ae0108d55..1e19beb793 100644
--- a/NEWS
+++ b/NEWS
@@ -9,6 +9,9 @@ Post-v3.3.0
  https://github.com/openvswitch/ovs.git
- DPDK:
  * OVS validated with DPDK 23.11.1.
+ * Link status changes are now handled via interrupt mode if the DPDK
+   driver supports it.  It is possible to revert to polling mode by setting
+   per interface 'dpdk-lsc-interrupt' other_config to 'false'.


Nit pick: this setting is options:dpdk-lsc-interrupt not in 
other_config.


___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [v4] odp-execute: Fix AVX checksum calculation.

2024-06-14 Thread Eelco Chaudron

> Op 14 jun 2024 om 10:13 heeft Phelan, Michael  het 
> volgende geschreven:
> 
> 
>> 
>> -Original Message-
>> From: Eelco Chaudron 
>> Sent: Thursday, June 13, 2024 12:45 PM
>> To: Finn, Emma ; Phelan, Michael
>> 
>> Cc: Ilya Maximets ; ovs-dev@openvswitch.org; Van
>> Haaren, Harry 
>> Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.
>> 
>> 
>> 
>> On 12 Jun 2024, at 12:42, Finn, Emma wrote:
>> 
 -Original Message-
>>> 
 From: Eelco Chaudron 
>>> 
 Sent: Thursday, May 30, 2024 2:44 PM
>>> 
 To: Finn, Emma ; Phelan, Michael
>>> 
 
>>> 
 Cc: Ilya Maximets ; ovs-dev@openvswitch.org; Van
>>> 
 Haaren, Harry 
>>> 
 Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.
>>> 
 
>>> 
 
>>> 
 
>>> 
> On 30 May 2024, at 15:28, Eelco Chaudron wrote:
>>> 
 
>>> 
>> On 30 May 2024, at 14:46, Finn, Emma wrote:
>>> 
> 
>>> 
>>> -Original Message-
>>> 
>>> From: Eelco Chaudron
>> mailto:echau...@redhat.com>>
>>> 
>>> Sent: Wednesday, May 29, 2024 3:23 PM
>>> 
>>> To: Finn, Emma
>> mailto:emma.f...@intel.com>>
>>> 
>>> Cc: Ilya Maximets mailto:i.maxim...@ovn.org>>
>> ; ovs-dev@openvswitch.org ; Van
>>> 
>>> Haaren, Harry
>> mailto:harry.van.haa...@intel.com>>
>>> 
>>> Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.
>>> 
>>> 
>>> 
>>> 
>>> 
>>> 
>>> 
 On 29 May 2024, at 14:51, Ilya Maximets wrote:
>>> 
>>> 
>>> 
> On 5/29/24 11:01, Eelco Chaudron wrote:
>>> 
> 
>>> 
> 
>>> 
>> On 28 May 2024, at 16:49, Ilya Maximets wrote:
>>> 
> 
>>> 
>>> On 5/28/24 14:36, Eelco Chaudron wrote:
>>> 
>>> 
>>> 
>>> 
>>> 
 On 24 May 2024, at 11:20, Emma Finn wrote:
>>> 
>>> 
>>> 
 The AVX implementation for calcualting checksums was not
>>> 
 handling carry-over addition correctly in some cases.
>>> 
 This patch adds an additional shuffle to add 16-bit padding to
>>> 
 the final part of the calculation to handle such cases. This
>>> 
 commit also adds a unit test to check the checksum carry-bits
>>> 
 issue with actions autovalidator enabled.
>>> 
>>> 
>>> 
>>> Hi Emma,
>>> 
>>> 
>>> 
>>> I made the small changes, and did some more testing before I
>> committed.
>>> 
>>> However, there are more failures in the same area with or without your
>>> 
 patch.
>>> 
>>> I’m holding of committing this patch as it might be related.
>>> 
>>> 
>>> 
>> 
>>> 
>> Hi Eelco,
>>> 
>> 
>>> 
>> These tests are unrelated to this patch so I think we should go ahead and
>>> 
 merge this.
>>> 
> 
>>> 
> Ok, I’ll go ahead and apply it later today.
>>> 
> 
>>> 
>>> The failing tests are (on latest main branch):
>>> 
>>> 
>>> 
>>> 1064: ofproto - implicit mask of ipv6 proto with HOPOPT field FAILED
>>> 
>>> (ofproto.at:6668)
>>> 
>> 
>>> 
>> I investigated this test and the SIMD implementation isn't handling 
>> traffic
>>> 
 class field correctly. I'm on PTO for the next week but I will make a fix 
 for this
>>> 
 once I'm back.
>>> 
> 
>>> 
> Thanks!
>>> 
> 
>>> 
>>> 2615: nsh - triangle PTAP bridge setup with NSH over vxlan-gpe
>>> 
>>> FAILED
>>> 
>>> (nsh.at:816)
>>> 
>>> 
>>> 
>> For this one it looks like the scalar is expecting an ipv4 checksum of 
>> 0x000
>>> 
 and the SIMD implementation has calculated an ipv4 checksum of 0xDF77.
>>> 
>> This is more a logic question whether or not the checksum should be
>>> 
 calculated for this? Thoughts?
>>> 
> 
>>> 
> I need to look at the tests, but if it’s a UDP packet, and the original 
> UDP
>>> 
 checksum was 0, it should stay zero.
>>> 
 
>>> 
 
>>> 
 In addition, any idea why these tests do not fail in Intel’s upstream unit
>> tests?
>>> 
 Do they use different hardware? Copied in Michael, maybe he knows more
>>> 
 about the setup/tests.
>>> 
 
>>> 
 //Eelco
>>> 
 
>>> 
>>> 
>>> 
>>> I have investigated both unit test failures.
>>> 
>>> 1064: ofproto - implicit mask of ipv6 proto with HOPOPT field FAILED
>> (ofproto.at:6668)
>>> 
>>> For this one, the AVX implementation didn't handle setting the IPv6 traffic
>> class field.
>>> 
>>> 
>>> 
>>> 2615: nsh - triangle PTAP bridge setup with NSH over vxlan-gpe FAILED
>> (nsh.at:816)
>>> 
>>> For this one, the AVX implementation was missing a check for IPv4 checksum
>> offload flag.
>>> 
>>> I have 2 separate patches to fix these issues and will send shortly.
>> 
>> Thanks Emma, I’ll review them next week, as I’m out at a conference (and a 
>> lot
>> of internal meetings).
>> 
>>> As for the Intel unit test CI (ovsrobot/intel-ovs-compilation), make check 
>>> is
>> never run with
>>> 
>>> any of the AVX autovalidators enab

Re: [ovs-dev] [PATCH v2] netdev-dpdk: Use LSC interrupt mode.

2024-06-14 Thread Robin Jarry

David Marchand, Jun 14, 2024 at 08:48:

Querying link status may get delayed for an undeterministic (long) time
with mlx5 ports. This is a consequence of the mlx5 driver calling ethtool
kernel API and getting stuck on the kernel RTNL lock while some other
operation is in progress under this lock.

One impact for long link status query is that it is called under the bond
lock taken in write mode periodically in bond_run().
In parallel, datapath threads may block requesting to read bonding related
info (like for example in bond_check_admissibility()).

The LSC interrupt mode is available with many DPDK drivers and is used by
default with testpmd.

It seems safe enough to switch on this feature by default in OVS.
We keep the per interface option to disable this feature in case of an
unforeseen bug.

Signed-off-by: David Marchand 
---
Changes since v1:
- (early) fail when interrupt lsc is requested by user but not supported
  by the driver,
- otherwise, log a debug message if user did not request interrupt mode,


This looks good to me. Is there a chance that this could be backported 
to the 3.1 branch?


Reviewed-by: Robin Jarry 

Thanks David!

___
dev mailing list
d...@openvswitch.org
https://mail.openvswitch.org/mailman/listinfo/ovs-dev


Re: [ovs-dev] [v4] odp-execute: Fix AVX checksum calculation.

2024-06-14 Thread Phelan, Michael
> -Original Message-
> From: Eelco Chaudron 
> Sent: Thursday, June 13, 2024 12:45 PM
> To: Finn, Emma ; Phelan, Michael
> 
> Cc: Ilya Maximets ; ovs-dev@openvswitch.org; Van
> Haaren, Harry 
> Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.
> 
> 
> 
> On 12 Jun 2024, at 12:42, Finn, Emma wrote:
> 
> >> -Original Message-
> >
> >> From: Eelco Chaudron 
> >
> >> Sent: Thursday, May 30, 2024 2:44 PM
> >
> >> To: Finn, Emma ; Phelan, Michael
> >
> >> 
> >
> >> Cc: Ilya Maximets ; ovs-dev@openvswitch.org; Van
> >
> >> Haaren, Harry 
> >
> >> Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.
> >
> >>
> >
> >>
> >
> >>
> >
> >> On 30 May 2024, at 15:28, Eelco Chaudron wrote:
> >
> >>
> >
> >>> On 30 May 2024, at 14:46, Finn, Emma wrote:
> >
> >>>
> >
> > -Original Message-
> >
> > From: Eelco Chaudron
> mailto:echau...@redhat.com>>
> >
> > Sent: Wednesday, May 29, 2024 3:23 PM
> >
> > To: Finn, Emma
> mailto:emma.f...@intel.com>>
> >
> > Cc: Ilya Maximets mailto:i.maxim...@ovn.org>>
> ; ovs-dev@openvswitch.org ; Van
> >
> > Haaren, Harry
> mailto:harry.van.haa...@intel.com>>
> >
> > Subject: Re: [v4] odp-execute: Fix AVX checksum calculation.
> >
> >
> >
> >
> >
> >
> >
> > On 29 May 2024, at 14:51, Ilya Maximets wrote:
> >
> >
> >
> >> On 5/29/24 11:01, Eelco Chaudron wrote:
> >
> >>>
> >
> >>>
> >
> >>> On 28 May 2024, at 16:49, Ilya Maximets wrote:
> >
> >>>
> >
>  On 5/28/24 14:36, Eelco Chaudron wrote:
> >
> >
> >
> >
> >
> > On 24 May 2024, at 11:20, Emma Finn wrote:
> >
> >
> >
> >> The AVX implementation for calcualting checksums was not
> >
> >> handling carry-over addition correctly in some cases.
> >
> >> This patch adds an additional shuffle to add 16-bit padding to
> >
> >> the final part of the calculation to handle such cases. This
> >
> >> commit also adds a unit test to check the checksum carry-bits
> >
> >> issue with actions autovalidator enabled.
> >
> >
> >
> > Hi Emma,
> >
> >
> >
> > I made the small changes, and did some more testing before I
> committed.
> >
> > However, there are more failures in the same area with or without your
> >
> >> patch.
> >
> > I’m holding of committing this patch as it might be related.
> >
> >
> >
> 
> >
>  Hi Eelco,
> >
> 
> >
>  These tests are unrelated to this patch so I think we should go ahead and
> >
> >> merge this.
> >
> >>>
> >
> >>> Ok, I’ll go ahead and apply it later today.
> >
> >>>
> >
> > The failing tests are (on latest main branch):
> >
> >
> >
> > 1064: ofproto - implicit mask of ipv6 proto with HOPOPT field FAILED
> >
> > (ofproto.at:6668)
> >
> 
> >
>  I investigated this test and the SIMD implementation isn't handling 
>  traffic
> >
> >> class field correctly. I'm on PTO for the next week but I will make a fix 
> >> for this
> >
> >> once I'm back.
> >
> >>>
> >
> >>> Thanks!
> >
> >>>
> >
> > 2615: nsh - triangle PTAP bridge setup with NSH over vxlan-gpe
> >
> > FAILED
> >
> > (nsh.at:816)
> >
> >
> >
>  For this one it looks like the scalar is expecting an ipv4 checksum of 
>  0x000
> >
> >> and the SIMD implementation has calculated an ipv4 checksum of 0xDF77.
> >
>  This is more a logic question whether or not the checksum should be
> >
> >> calculated for this? Thoughts?
> >
> >>>
> >
> >>> I need to look at the tests, but if it’s a UDP packet, and the original 
> >>> UDP
> >
> >> checksum was 0, it should stay zero.
> >
> >>
> >
> >>
> >
> >> In addition, any idea why these tests do not fail in Intel’s upstream unit
> tests?
> >
> >> Do they use different hardware? Copied in Michael, maybe he knows more
> >
> >> about the setup/tests.
> >
> >>
> >
> >> //Eelco
> >
> >>
> >
> >
> >
> > I have investigated both unit test failures.
> >
> > 1064: ofproto - implicit mask of ipv6 proto with HOPOPT field FAILED
> (ofproto.at:6668)
> >
> > For this one, the AVX implementation didn't handle setting the IPv6 traffic
> class field.
> >
> >
> >
> > 2615: nsh - triangle PTAP bridge setup with NSH over vxlan-gpe FAILED
> (nsh.at:816)
> >
> > For this one, the AVX implementation was missing a check for IPv4 checksum
> offload flag.
> >
> > I have 2 separate patches to fix these issues and will send shortly.
> 
> Thanks Emma, I’ll review them next week, as I’m out at a conference (and a lot
> of internal meetings).
> 
> > As for the Intel unit test CI (ovsrobot/intel-ovs-compilation), make check 
> > is
> never run with
> >
> > any of the AVX autovalidators enabled. Table below shows the 4 builds and
> the unit tests ran
> >
> > after each build.
> 
> I guess it would be good to add the “make check” to the runs below. Michael
> would you be able to set this up?
Hi Eelco,
Yes, I can add make check to all of the runs 

[ovs-dev] [BUG] Why snat action NOT work while using non-DPDK port in openflow

2024-06-14 Thread Simon Jones
Hi all,

I'm using ovs-dpdk(ovs 2.17.2, dpdk 21.11.1).

Now I found a bug: Why snat action NOT work while using non-DPDK port in
openflow.

1. how to reproduce
```
### add bridge and port link this

[root@bogon ~]# ovs-vsctl show
272472f6-3ab9-4cd5-b1dd-76a63f0e6127
Bridge brp0
datapath_type: netdev
Port pf0hpf
Interface pf0hpf
type: dpdk
options: {dpdk-devargs=":01:00.0,representor=[65535]"}
Port brp0
Interface brp0
type: internal
Port brp0-patch
Interface brp0-patch
type: patch
options: {peer=br-int-patch}
Port p0
Interface p0
type: dpdk
options: {dpdk-devargs=":01:00.0"}
Bridge br-int
datapath_type: netdev
Port s0
Interface s0
Port br-int-patch
Interface br-int-patch
type: patch
options: {peer=brp0-patch}
Port pf0vf2
Interface pf0vf2
type: dpdk
options: {dpdk-devargs=":01:00.0,representor=[2]"}
Port pf0vf0
Interface pf0vf0
type: dpdk
options: {dpdk-devargs=":01:00.0,representor=[0]"}
Port pf0vf1
Interface pf0vf1
type: dpdk
options: {dpdk-devargs=":01:00.0,representor=[1]"}
Port geneve1
Interface geneve1
type: geneve
options: {key=flow, remote_ip="10.0.0.2"}
Port pf0vf3
Interface pf0vf3
type: dpdk
options: {dpdk-devargs=":01:00.0,representor=[3]"}
Port br-int
Interface br-int
type: internal
ovs_version: "2.17.2"

[root@bogon ~]#
[root@bogon ~]# ovs-ofctl dump-flows br-int
 cookie=0x0, duration=4908.200s, table=0, n_packets=25, n_bytes=1898,
priority=20,ip,metadata=0x1,in_port=s0,dl_dst=00:00:00:33:00:01,nw_dst=10.0.0.2
actions=dec_ttl,mod_dl_src:00:00:00:00:02:04,resubmit(,1)
 cookie=0x0, duration=4908.178s, table=0, n_packets=25, n_bytes=1898,
priority=30,ip,metadata=0x2,in_port=s0,dl_dst=00:00:00:00:02:05,nw_dst=10.0.0.2
actions=dec_ttl,mod_dl_src:00:00:00:00:04:01,resubmit(,1)
 cookie=0x0, duration=4908.141s, table=0, n_packets=2, n_bytes=196,
priority=20,ip,metadata=0x2,in_port="br-int-patch",dl_dst=00:00:00:00:04:01,nw_dst=10.0.0.3
actions=ct(table=1,nat)
 cookie=0x0, duration=4908.111s, table=0, n_packets=2, n_bytes=196,
priority=30,ip,metadata=0x1,in_port="br-int-patch",dl_dst=00:00:00:00:02:04,nw_dst=10.100.0.2
actions=dec_ttl,mod_dl_src:00:00:00:33:00:01,resubmit(,1)
 cookie=0x0, duration=4908.368s, table=0, n_packets=7, n_bytes=294,
priority=10,arp,in_port=s0 actions=resubmit(,1)
 cookie=0x0, duration=4908.271s, table=0, n_packets=1, n_bytes=60,
priority=10,arp,in_port="br-int-patch" actions=resubmit(,1)
 cookie=0x0, duration=4908.206s, table=0, n_packets=25, n_bytes=1898,
priority=10,ip,in_port=s0,dl_dst=00:00:00:33:00:01,nw_dst=10.0.0.2
actions=clone(load:0x1->OXM_OF_METADATA[],resubmit(,0))
 cookie=0x0, duration=4908.148s, table=0, n_packets=2, n_bytes=196,
priority=10,ip,in_port="br-int-patch",dl_dst=00:00:00:00:04:01,nw_dst=10.0.0.3
actions=clone(load:0x2->OXM_OF_METADATA[],resubmit(,0))
 cookie=0x0, duration=4908.193s, table=1, n_packets=25, n_bytes=1898,
priority=20,ip,metadata=0x1,in_port=s0,dl_dst=00:00:00:33:00:01,nw_dst=10.0.0.2
actions=mod_dl_dst:00:00:00:00:02:05,resubmit(,2)
 cookie=0x0, duration=4908.170s, table=1, n_packets=25, n_bytes=1898,
priority=30,ip,metadata=0x2,in_port=s0,dl_dst=00:00:00:00:02:05,nw_dst=10.0.0.2
actions=mod_dl_dst:00:00:00:00:03:01,resubmit(,2)
 cookie=0x0, duration=4908.133s, table=1, n_packets=2, n_bytes=196,
priority=20,ip,metadata=0x2,in_port="br-int-patch",dl_dst=00:00:00:00:04:01,nw_dst=10.100.0.2
actions=dec_ttl,mod_dl_src:00:00:00:00:02:05,resubmit(,2)
 cookie=0x0, duration=4908.103s, table=1, n_packets=2, n_bytes=196,
priority=30,ip,metadata=0x1,in_port="br-int-patch",dl_dst=00:00:00:00:02:04,nw_dst=10.100.0.2
actions=mod_dl_dst:00:00:00:33:00:02,resubmit(,2)
 cookie=0x0, duration=4908.337s, table=1, n_packets=7, n_bytes=294,
priority=10,arp,in_port=s0,arp_tpa=10.100.0.1,arp_op=1
actions=move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],mod_dl_src:00:00:00:33:00:01,load:0x2->NXM_OF_ARP_OP[],move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],load:0x330001->NXM_NX_ARP_SHA[],move:NXM_OF_ARP_SPA[]->NXM_OF_ARP_TPA[],load:0xa640001->NXM_OF_ARP_SPA[],resubmit(,2)
 cookie=0x0, duration=4908.240s, table=1, n_packets=1, n_bytes=60,
priority=10,arp,in_port="br-int-patch",arp_tpa=10.0.0.3,arp_op=1
actions=move:NXM_OF_ETH_SRC[]->NXM_OF_ETH_DST[],mod_dl_src:00:00:00:00:04:01,load:0x2->NXM_OF_ARP_OP[],move:NXM_NX_ARP_SHA[]->NXM_NX_ARP_THA[],load:0x401->NXM_NX_ARP_SHA[],move:NXM_OF_ARP_SPA[]->NXM_OF_ARP_TPA[],load:0xa03->NXM_OF_ARP_SPA[],resubmit(,2)
 cookie=0x0, duration=4908.