On 3/22/19 3:58 AM, Wenzhuo Lu wrote:
Signed-off-by: Wenzhuo Lu <wenzhuo...@intel.com>
---
drivers/net/ice/Makefile | 19 ++
drivers/net/ice/ice_rxtx.c | 16 +-
drivers/net/ice/ice_rxtx.h | 2 +
drivers/net/ice/ice_rxtx_vec_avx2.c | 622 ++++++++++++++++++++++++++++++++++++
drivers/net/ice/meson.build | 15 +
5 files changed, 671 insertions(+), 3 deletions(-)
create mode 100644 drivers/net/ice/ice_rxtx_vec_avx2.c
diff --git a/drivers/net/ice/Makefile b/drivers/net/ice/Makefile
index 92594bb..5ba59f4 100644
--- a/drivers/net/ice/Makefile
+++ b/drivers/net/ice/Makefile
@@ -58,4 +58,23 @@ ifeq ($(CONFIG_RTE_ARCH_X86), y)
SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_sse.c
endif
+ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2)
+ CC_AVX2_SUPPORT=1
+else
+ CC_AVX2_SUPPORT=\
+ $(shell $(CC) -march=core-avx2 -dM -E - </dev/null 2>&1 | \
+ grep -q AVX2 && echo 1)
+ ifeq ($(CC_AVX2_SUPPORT), 1)
+ ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
+ CFLAGS_ice_rxtx_vec_avx2.o += -march=core-avx2
+ else
+ CFLAGS_ice_rxtx_vec_avx2.o += -mavx2
+ endif
+ endif
+endif
+
+ifeq ($(CC_AVX2_SUPPORT), 1)
+ SRCS-$(CONFIG_RTE_LIBRTE_ICE_PMD) += ice_rxtx_vec_avx2.c
+endif
+
include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/ice/ice_rxtx.c b/drivers/net/ice/ice_rxtx.c
index f9ecffa..6191f34 100644
--- a/drivers/net/ice/ice_rxtx.c
+++ b/drivers/net/ice/ice_rxtx.c
@@ -1494,7 +1494,8 @@
#ifdef RTE_ARCH_X86
if (dev->rx_pkt_burst == ice_recv_pkts_vec ||
- dev->rx_pkt_burst == ice_recv_scattered_pkts_vec)
+ dev->rx_pkt_burst == ice_recv_scattered_pkts_vec ||
+ dev->rx_pkt_burst == ice_recv_pkts_vec_avx2)
return ptypes;
#endif
@@ -2236,21 +2237,30 @@ void __attribute__((cold))
#ifdef RTE_ARCH_X86
struct ice_rx_queue *rxq;
int i;
+ bool use_avx2 = false;
if (!ice_rx_vec_dev_check(dev)) {
for (i = 0; i < dev->data->nb_rx_queues; i++) {
rxq = dev->data->rx_queues[i];
(void)ice_rxq_vec_setup(rxq);
}
+
+ if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2) == 1 ||
+ rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F) == 1)
+ use_avx2 = true;
+
if (dev->data->scattered_rx) {
PMD_DRV_LOG(DEBUG,
"Using Vector Scattered Rx (port %d).",
dev->data->port_id);
dev->rx_pkt_burst = ice_recv_scattered_pkts_vec;
} else {
- PMD_DRV_LOG(DEBUG, "Using Vector Rx (port %d).",
+ PMD_DRV_LOG(DEBUG, "Using %sVector Rx (port %d).",
+ use_avx2 ? "avx2 " : "",
dev->data->port_id);
- dev->rx_pkt_burst = ice_recv_pkts_vec;
+ dev->rx_pkt_burst = use_avx2 ?
+ ice_recv_pkts_vec_avx2 :
+ ice_recv_pkts_vec;
}
return;
diff --git a/drivers/net/ice/ice_rxtx.h b/drivers/net/ice/ice_rxtx.h
index 1dde4e7..d1c9b92 100644
--- a/drivers/net/ice/ice_rxtx.h
+++ b/drivers/net/ice/ice_rxtx.h
@@ -179,4 +179,6 @@ uint16_t ice_recv_scattered_pkts_vec(void *rx_queue, struct
rte_mbuf **rx_pkts,
uint16_t nb_pkts);
uint16_t ice_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
+uint16_t ice_recv_pkts_vec_avx2(void *rx_queue, struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
#endif /* _ICE_RXTX_H_ */
diff --git a/drivers/net/ice/ice_rxtx_vec_avx2.c
b/drivers/net/ice/ice_rxtx_vec_avx2.c
new file mode 100644
index 0000000..763fa9f
--- /dev/null
+++ b/drivers/net/ice/ice_rxtx_vec_avx2.c
@@ -0,0 +1,622 @@
+/* SPDX-License-Identifier: BSD-3-Clause
+ * Copyright(c) 2019 Intel Corporation
+ */
+
+#include "ice_rxtx_vec_common.h"
+
+#include <x86intrin.h>
+
+#ifndef __INTEL_COMPILER
+#pragma GCC diagnostic ignored "-Wcast-qual"
+#endif
+
+static inline void
+ice_rxq_rearm(struct ice_rx_queue *rxq)
+{
+ int i;
+ uint16_t rx_id;
+ volatile union ice_rx_desc *rxdp;
+ struct ice_rx_entry *rxep = &rxq->sw_ring[rxq->rxrearm_start];
+
+ rxdp = rxq->rx_ring + rxq->rxrearm_start;
+
+ /* Pull 'n' more MBUFs into the software ring */
+ if (rte_mempool_get_bulk(rxq->mp,
+ (void *)rxep,
+ ICE_RXQ_REARM_THRESH) < 0) {
+ if (rxq->rxrearm_nb + ICE_RXQ_REARM_THRESH >=
+ rxq->nb_rx_desc) {
+ __m128i dma_addr0;
+
+ dma_addr0 = _mm_setzero_si128();
+ for (i = 0; i < ICE_DESCS_PER_LOOP; i++) {
+ rxep[i].mbuf = &rxq->fake_mbuf;
+ _mm_store_si128((__m128i *)&rxdp[i].read,
+ dma_addr0);
+ }
+ }
+ rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed +=
+ ICE_RXQ_REARM_THRESH;
+ return;
+ }
+
+#ifndef RTE_LIBRTE_ICE_16BYTE_RX_DESC
I see same is done for other Intel NICs, but I wonder what would be the
performance cost of making it dynamic, if any cost?
Having it dynamic (as a dev arg for instance) would make it possible to
change the value when the user is using dpdk from a distro. It would
also help testing coverage.
Btw, how do you select this option with meson build system?