On Wed, Jan 29, 2025 at 05:48:54PM +0530, Shaiq Wani wrote: > In case some CPUs don't support AVX512. Enable AVX2 for them to > get better per-core performance. > > The single queue model processes all packets in order while > the split queue model separates packet data and metadata into > different queues for parallel processing and improved performance. > > Signed-off-by: Shaiq Wani <shaiq.w...@intel.com> > --- > drivers/common/idpf/idpf_common_device.h | 1 + > drivers/common/idpf/idpf_common_rxtx.h | 4 + > drivers/common/idpf/idpf_common_rxtx_avx2.c | 485 ++++++++++++++++++++ > drivers/common/idpf/meson.build | 9 + > drivers/common/idpf/version.map | 1 + > drivers/net/idpf/idpf_rxtx.c | 11 +
This patch needs a rebase onto next-net-intel to update this final path. See also comments inline below. Regards, /Bruce > 6 files changed, 511 insertions(+) > create mode 100644 drivers/common/idpf/idpf_common_rxtx_avx2.c > > diff --git a/drivers/common/idpf/idpf_common_device.h > b/drivers/common/idpf/idpf_common_device.h > index bfa927a5ff..734be1c88a 100644 > --- a/drivers/common/idpf/idpf_common_device.h > +++ b/drivers/common/idpf/idpf_common_device.h > @@ -123,6 +123,7 @@ struct idpf_vport { > > bool rx_vec_allowed; > bool tx_vec_allowed; > + bool rx_use_avx2; > bool rx_use_avx512; > bool tx_use_avx512; > > diff --git a/drivers/common/idpf/idpf_common_rxtx.h > b/drivers/common/idpf/idpf_common_rxtx.h > index eeeeed12e2..f50cf5ef46 100644 > --- a/drivers/common/idpf/idpf_common_rxtx.h > +++ b/drivers/common/idpf/idpf_common_rxtx.h > @@ -302,5 +302,9 @@ uint16_t idpf_dp_splitq_xmit_pkts_avx512(void *tx_queue, > struct rte_mbuf **tx_pk > __rte_internal > uint16_t idpf_dp_singleq_recv_scatter_pkts(void *rx_queue, struct rte_mbuf > **rx_pkts, > uint16_t nb_pkts); > +__rte_internal > +uint16_t idpf_dp_singleq_recv_pkts_avx2(void *rx_queue, > + struct rte_mbuf **rx_pkts, > + uint16_t nb_pkts); > > #endif /* _IDPF_COMMON_RXTX_H_ */ > diff --git a/drivers/common/idpf/idpf_common_rxtx_avx2.c > b/drivers/common/idpf/idpf_common_rxtx_avx2.c > new file mode 100644 > index 0000000000..de76f01ff8 > --- /dev/null > +++ b/drivers/common/idpf/idpf_common_rxtx_avx2.c > @@ -0,0 +1,485 @@ > +/* SPDX-License-Identifier: BSD-3-Clause > + * Copyright(c) 2023 Intel Corporation > + */ > + > +#include <rte_vect.h> > + > +#include "idpf_common_rxtx.h" > +#include "idpf_common_device.h" > + <snip> > +idpf_dp_singleq_recv_pkts_avx2(void *rx_queue, struct rte_mbuf **rx_pkts, > uint16_t nb_pkts) > +{ > + return _idpf_singleq_recv_raw_pkts_vec_avx2(rx_queue, rx_pkts, nb_pkts); > +} I assume the presence of this wrapper function is to allow future support for scattered packets, as with other drivers? > diff --git a/drivers/common/idpf/meson.build b/drivers/common/idpf/meson.build > index 46fd45c03b..fe54fdcd87 100644 > --- a/drivers/common/idpf/meson.build > +++ b/drivers/common/idpf/meson.build > @@ -16,6 +16,15 @@ sources = files( > ) > > if arch_subdir == 'x86' > + sources += files('idpf_common_rxtx.c') This is already added to sources in the block above. It's also not x86 specific, I believe. > + cflags += ['-DCC_AVX2_SUPPORT'] Strictly speaking, we don't need this, as we always have AVX2 support in x86 builds. The code can just check for RTE_ARCH_x86 macro. > + idpf_avx2_lib = static_library('idpf_avx2_lib', > + 'idpf_common_rxtx_avx2.c', > + dependencies: [static_rte_ethdev, static_rte_kvargs, > static_rte_hash], rte_kvargs is already a dependency of hash and ethdev libraries, so you can drop it from the list to shorten it. > + include_directories: includes, > + c_args: [cflags, '-mavx2']) > + objs += idpf_avx2_lib.extract_objects('idpf_common_rxtx_avx2.c') > + > if cc_has_avx512 > cflags += ['-DCC_AVX512_SUPPORT'] > avx512_args = cflags + cc_avx512_flags > diff --git a/drivers/common/idpf/version.map b/drivers/common/idpf/version.map > index 0729f6b912..22b689f5f5 100644 > --- a/drivers/common/idpf/version.map > +++ b/drivers/common/idpf/version.map > @@ -6,6 +6,7 @@ INTERNAL { > > idpf_dp_prep_pkts; > idpf_dp_singleq_recv_pkts; > + idpf_dp_singleq_recv_pkts_avx2; > idpf_dp_singleq_recv_pkts_avx512; > idpf_dp_singleq_recv_scatter_pkts; > idpf_dp_singleq_xmit_pkts; > diff --git a/drivers/net/idpf/idpf_rxtx.c b/drivers/net/idpf/idpf_rxtx.c > index 858bbefe3b..a8377d3fee 100644 > --- a/drivers/net/idpf/idpf_rxtx.c > +++ b/drivers/net/idpf/idpf_rxtx.c > @@ -776,6 +776,10 @@ idpf_set_rx_function(struct rte_eth_dev *dev) > rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) { > vport->rx_vec_allowed = true; > > + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 && > + rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_256) > + vport->rx_use_avx2 = true; > + > if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512) > #ifdef CC_AVX512_SUPPORT > if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 > && > @@ -827,6 +831,13 @@ idpf_set_rx_function(struct rte_eth_dev *dev) > return; > } > #endif /* CC_AVX512_SUPPORT */ > + if (vport->rx_use_avx2) { > + PMD_DRV_LOG(NOTICE, > + "Using Single AVX2 Vector Rx (port > %d).", > + dev->data->port_id); > + dev->rx_pkt_burst = > idpf_dp_singleq_recv_pkts_avx2; > + return; > + } > } > > if (dev->data->scattered_rx) { > -- > 2.34.1 >