cpfl: add AVX512 data path for single queue model

Mingxia Liu Thu, 22 Dec 2022 18:53:25 -0800

Add support of AVX512 vector data path for single queue model.

Signed-off-by: Wenjun Wu <wenjun1...@intel.com>
Signed-off-by: Mingxia Liu <mingxia....@intel.com>
---
 doc/guides/nics/cpfl.rst                | 24 +++++-
 drivers/net/cpfl/cpfl_ethdev.c          |  3 +-
 drivers/net/cpfl/cpfl_rxtx.c            | 85 +++++++++++++++++++++
 drivers/net/cpfl/cpfl_rxtx_vec_common.h | 99 +++++++++++++++++++++++++
 drivers/net/cpfl/meson.build            | 25 ++++++-
 5 files changed, 233 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/cpfl/cpfl_rxtx_vec_common.h


diff --git a/doc/guides/nics/cpfl.rst b/doc/guides/nics/cpfl.rst
index 064c69ba7d..489a2d6153 100644
--- a/doc/guides/nics/cpfl.rst
+++ b/doc/guides/nics/cpfl.rst
@@ -63,4 +63,26 @@ Runtime Config Options
 Driver compilation and testing
 ------------------------------
 
-Refer to the document :doc:`build_and_test` for details.
\ No newline at end of file
+Refer to the document :doc:`build_and_test` for details.
+
+Features
+--------
+
+Vector PMD
+~~~~~~~~~~
+
+Vector path for Rx and Tx path are selected automatically.
+The paths are chosen based on 2 conditions:
+
+- ``CPU``
+
+  On the x86 platform, the driver checks if the CPU supports AVX512.
+  If the CPU supports AVX512 and EAL argument ``--force-max-simd-bitwidth``
+  is set to 512, AVX512 paths will be chosen.
+
+- ``Offload features``
+
+  The supported HW offload features are described in the document cpfl.ini,
+  A value "P" means the offload feature is not supported by vector path.
+  If any not supported features are used, cpfl vector PMD is disabled
+  and the scalar paths are chosen.
diff --git a/drivers/net/cpfl/cpfl_ethdev.c b/drivers/net/cpfl/cpfl_ethdev.c
index f684d7cff5..5fe800f27c 100644
--- a/drivers/net/cpfl/cpfl_ethdev.c
+++ b/drivers/net/cpfl/cpfl_ethdev.c
@@ -111,7 +111,8 @@ cpfl_dev_info_get(struct rte_eth_dev *dev, struct 
rte_eth_dev_info *dev_info)
                RTE_ETH_TX_OFFLOAD_TCP_CKSUM            |
                RTE_ETH_TX_OFFLOAD_SCTP_CKSUM           |
                RTE_ETH_TX_OFFLOAD_TCP_TSO              |
-               RTE_ETH_TX_OFFLOAD_MULTI_SEGS;
+               RTE_ETH_TX_OFFLOAD_MULTI_SEGS           |
+               RTE_ETH_TX_OFFLOAD_MBUF_FAST_FREE;
 
        dev_info->default_txconf = (struct rte_eth_txconf) {
                .tx_free_thresh = CPFL_DEFAULT_TX_FREE_THRESH,
diff --git a/drivers/net/cpfl/cpfl_rxtx.c b/drivers/net/cpfl/cpfl_rxtx.c
index 0e053f4434..63f474a79b 100644
--- a/drivers/net/cpfl/cpfl_rxtx.c
+++ b/drivers/net/cpfl/cpfl_rxtx.c
@@ -8,6 +8,7 @@
 
 #include "cpfl_ethdev.h"
 #include "cpfl_rxtx.h"
+#include "cpfl_rxtx_vec_common.h"
 
 static uint64_t
 cpfl_rx_offload_convert(uint64_t offload)
@@ -739,22 +740,106 @@ void
 cpfl_set_rx_function(struct rte_eth_dev *dev)
 {
        struct idpf_vport *vport = dev->data->dev_private;
+#ifdef RTE_ARCH_X86
+       struct idpf_rx_queue *rxq;
+       int i;
+
+       if (cpfl_rx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH &&
+           rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+               vport->rx_vec_allowed = true;
+
+               if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+#ifdef CC_AVX512_SUPPORT
+                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 
&&
+                           rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1 
&&
+                           rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512DQ))
+                               vport->rx_use_avx512 = true;
+#else
+               PMD_DRV_LOG(NOTICE,
+                           "AVX512 is not supported in build env");
+#endif /* CC_AVX512_SUPPORT */
+       } else {
+               vport->rx_vec_allowed = false;
+       }
+#endif /* RTE_ARCH_X86 */
+
+#ifdef RTE_ARCH_X86
+       if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
+               dev->rx_pkt_burst = idpf_splitq_recv_pkts;
+       } else {
+               if (vport->rx_vec_allowed) {
+                       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+                               rxq = dev->data->rx_queues[i];
+                               (void)idpf_singleq_rx_vec_setup(rxq);
+                       }
+#ifdef CC_AVX512_SUPPORT
+                       if (vport->rx_use_avx512) {
+                               dev->rx_pkt_burst = 
idpf_singleq_recv_pkts_avx512;
+                               return;
+                       }
+#endif /* CC_AVX512_SUPPORT */
+               }
 
+               dev->rx_pkt_burst = idpf_singleq_recv_pkts;
+       }
+#else
        if (vport->rxq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT)
                dev->rx_pkt_burst = idpf_splitq_recv_pkts;
        else
                dev->rx_pkt_burst = idpf_singleq_recv_pkts;
+#endif /* RTE_ARCH_X86 */
 }
 
 void
 cpfl_set_tx_function(struct rte_eth_dev *dev)
 {
        struct idpf_vport *vport = dev->data->dev_private;
+#ifdef RTE_ARCH_X86
+#ifdef CC_AVX512_SUPPORT
+       struct idpf_tx_queue *txq;
+       int i;
+#endif /* CC_AVX512_SUPPORT */
+
+       if (cpfl_tx_vec_dev_check_default(dev) == CPFL_VECTOR_PATH &&
+           rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
+               vport->tx_vec_allowed = true;
+               if (rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_512)
+#ifdef CC_AVX512_SUPPORT
+               {
+                       if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1 
&&
+                           rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512BW) == 1)
+                               vport->tx_use_avx512 = true;
+                       if (vport->tx_use_avx512) {
+                               for (i = 0; i < dev->data->nb_tx_queues; i++) {
+                                       txq = dev->data->tx_queues[i];
+                                       idpf_tx_vec_setup_avx512(txq);
+                               }
+                       }
+               }
+#else
+               PMD_DRV_LOG(NOTICE,
+                           "AVX512 is not supported in build env");
+#endif /* CC_AVX512_SUPPORT */
+       } else {
+               vport->tx_vec_allowed = false;
+       }
+#endif /* RTE_ARCH_X86 */
 
        if (vport->txq_model == VIRTCHNL2_QUEUE_MODEL_SPLIT) {
                dev->tx_pkt_burst = idpf_splitq_xmit_pkts;
                dev->tx_pkt_prepare = idpf_prep_pkts;
        } else {
+#ifdef RTE_ARCH_X86
+               if (vport->tx_vec_allowed) {
+#ifdef CC_AVX512_SUPPORT
+                       if (vport->tx_use_avx512) {
+                               dev->tx_pkt_burst = 
idpf_singleq_xmit_pkts_avx512;
+                               dev->tx_pkt_prepare = idpf_prep_pkts;
+                               return;
+                       }
+#endif /* CC_AVX512_SUPPORT */
+               }
+#endif /* RTE_ARCH_X86 */
                dev->tx_pkt_burst = idpf_singleq_xmit_pkts;
                dev->tx_pkt_prepare = idpf_prep_pkts;
        }
diff --git a/drivers/net/cpfl/cpfl_rxtx_vec_common.h 
b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
new file mode 100644
index 0000000000..a411cf6a32
--- /dev/null
+++ b/drivers/net/cpfl/cpfl_rxtx_vec_common.h
@@ -0,0 +1,99 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2022 Intel Corporation
+ */
+
+#ifndef _CPFL_RXTX_VEC_COMMON_H_
+#define _CPFL_RXTX_VEC_COMMON_H_
+#include <stdint.h>
+#include <ethdev_driver.h>
+#include <rte_malloc.h>
+
+#include "cpfl_ethdev.h"
+#include "cpfl_rxtx.h"
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+#define CPFL_VECTOR_PATH               0
+#define ICE_RX_NO_VECTOR_FLAGS (               \
+               RTE_ETH_RX_OFFLOAD_IPV4_CKSUM | \
+               RTE_ETH_RX_OFFLOAD_UDP_CKSUM |  \
+               RTE_ETH_RX_OFFLOAD_TCP_CKSUM |  \
+               RTE_ETH_RX_OFFLOAD_OUTER_IPV4_CKSUM |   \
+               RTE_ETH_RX_OFFLOAD_TIMESTAMP)
+#define ICE_TX_NO_VECTOR_FLAGS (               \
+               RTE_ETH_TX_OFFLOAD_TCP_TSO |    \
+               RTE_ETH_TX_OFFLOAD_MULTI_SEGS)
+
+static inline int
+cpfl_rx_vec_queue_default(struct idpf_rx_queue *rxq)
+{
+       if (rxq == NULL)
+               return -1;
+
+       if (rte_is_power_of_2(rxq->nb_rx_desc) == 0)
+               return -1;
+
+       if (rxq->rx_free_thresh < IDPF_VPMD_RX_MAX_BURST)
+               return -1;
+
+       if ((rxq->nb_rx_desc % rxq->rx_free_thresh) != 0)
+               return -1;
+
+       if ((rxq->offloads & ICE_RX_NO_VECTOR_FLAGS) != 0)
+               return -1;
+
+       return CPFL_VECTOR_PATH;
+}
+
+static inline int
+cpfl_tx_vec_queue_default(struct idpf_tx_queue *txq)
+{
+       if (txq == NULL)
+               return -1;
+
+       if (txq->rs_thresh < IDPF_VPMD_TX_MAX_BURST ||
+           (txq->rs_thresh & 3) != 0)
+               return -1;
+
+       if ((txq->offloads & ICE_TX_NO_VECTOR_FLAGS) != 0)
+               return -1;
+
+       return CPFL_VECTOR_PATH;
+}
+
+static inline int
+cpfl_rx_vec_dev_check_default(struct rte_eth_dev *dev)
+{
+       struct idpf_rx_queue *rxq;
+       int i, ret = 0;
+
+       for (i = 0; i < dev->data->nb_rx_queues; i++) {
+               rxq = dev->data->rx_queues[i];
+               ret = (cpfl_rx_vec_queue_default(rxq));
+               if (ret < 0)
+                       return -1;
+       }
+
+       return CPFL_VECTOR_PATH;
+}
+
+static inline int
+cpfl_tx_vec_dev_check_default(struct rte_eth_dev *dev)
+{
+       int i;
+       struct idpf_tx_queue *txq;
+       int ret = 0;
+
+       for (i = 0; i < dev->data->nb_tx_queues; i++) {
+               txq = dev->data->tx_queues[i];
+               ret = cpfl_tx_vec_queue_default(txq);
+               if (ret < 0)
+                       return -1;
+       }
+
+       return CPFL_VECTOR_PATH;
+}
+
+#endif /*_CPFL_RXTX_VEC_COMMON_H_*/
diff --git a/drivers/net/cpfl/meson.build b/drivers/net/cpfl/meson.build
index 3ccee15703..40ed8dbb7b 100644
--- a/drivers/net/cpfl/meson.build
+++ b/drivers/net/cpfl/meson.build
@@ -7,9 +7,32 @@ if is_windows
     subdir_done()
 endif
 
+if dpdk_conf.get('RTE_IOVA_AS_PA') == 0
+    build = false
+    reason = 'driver does not support disabling IOVA as PA mode'
+    subdir_done()
+endif
+
 deps += ['common_idpf']
 
 sources = files(
         'cpfl_ethdev.c',
         'cpfl_rxtx.c',
-)
\ No newline at end of file
+)
+
+if arch_subdir == 'x86'
+    cpfl_avx512_cpu_support = (
+        cc.get_define('__AVX512F__', args: machine_args) != '' and
+        cc.get_define('__AVX512BW__', args: machine_args) != ''
+    )
+
+    cpfl_avx512_cc_support = (
+        not machine_args.contains('-mno-avx512f') and
+        cc.has_argument('-mavx512f') and
+        cc.has_argument('-mavx512bw')
+    )
+
+    if cpfl_avx512_cpu_support == true or cpfl_avx512_cc_support == true
+        cflags += ['-DCC_AVX512_SUPPORT']
+    endif
+endif
-- 
2.25.1

[PATCH 15/21] net/cpfl: add AVX512 data path for single queue model

Reply via email to