The size of a pointer on 32-bit is only 4 rather than 8 bytes, so copying 32 pointers only requires half the number of AVX-512 load store operations.
Fixes: 5bf87b45b2c8 ("net/idpf: add AVX512 data path for single queue model") Cc: sta...@dpdk.org Signed-off-by: Bruce Richardson <bruce.richard...@intel.com> --- drivers/common/idpf/idpf_common_rxtx_avx512.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c b/drivers/common/idpf/idpf_common_rxtx_avx512.c index 3b5e124ec8..b8450b03ae 100644 --- a/drivers/common/idpf/idpf_common_rxtx_avx512.c +++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c @@ -1043,6 +1043,7 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq) uint32_t copied = 0; /* n is multiple of 32 */ while (copied < n) { +#ifdef RTE_ARCH_64 const __m512i a = _mm512_loadu_si512(&txep[copied]); const __m512i b = _mm512_loadu_si512(&txep[copied + 8]); const __m512i c = _mm512_loadu_si512(&txep[copied + 16]); @@ -1052,6 +1053,12 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue *txq) _mm512_storeu_si512(&cache_objs[copied + 8], b); _mm512_storeu_si512(&cache_objs[copied + 16], c); _mm512_storeu_si512(&cache_objs[copied + 24], d); +#else + const __m512i a = _mm512_loadu_si512(&txep[copied]); + const __m512i b = _mm512_loadu_si512(&txep[copied + 16]); + _mm512_storeu_si512(&cache_objs[copied], a); + _mm512_storeu_si512(&cache_objs[copied + 16], b); +#endif copied += 32; } cache->len += n; -- 2.43.0