The size of a pointer on 32-bit is only 4 rather than 8 bytes, so
copying 32 pointers only requires half the number of AVX-512 load store
operations.

Fixes: 5bf87b45b2c8 ("net/idpf: add AVX512 data path for single queue model")
Cc: sta...@dpdk.org

Signed-off-by: Bruce Richardson <bruce.richard...@intel.com>
---
 drivers/common/idpf/idpf_common_rxtx_avx512.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/common/idpf/idpf_common_rxtx_avx512.c 
b/drivers/common/idpf/idpf_common_rxtx_avx512.c
index 3b5e124ec8..b8450b03ae 100644
--- a/drivers/common/idpf/idpf_common_rxtx_avx512.c
+++ b/drivers/common/idpf/idpf_common_rxtx_avx512.c
@@ -1043,6 +1043,7 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue 
*txq)
                uint32_t copied = 0;
                /* n is multiple of 32 */
                while (copied < n) {
+#ifdef RTE_ARCH_64
                        const __m512i a = _mm512_loadu_si512(&txep[copied]);
                        const __m512i b = _mm512_loadu_si512(&txep[copied + 8]);
                        const __m512i c = _mm512_loadu_si512(&txep[copied + 
16]);
@@ -1052,6 +1053,12 @@ idpf_tx_singleq_free_bufs_avx512(struct idpf_tx_queue 
*txq)
                        _mm512_storeu_si512(&cache_objs[copied + 8], b);
                        _mm512_storeu_si512(&cache_objs[copied + 16], c);
                        _mm512_storeu_si512(&cache_objs[copied + 24], d);
+#else
+                       const __m512i a = _mm512_loadu_si512(&txep[copied]);
+                       const __m512i b = _mm512_loadu_si512(&txep[copied + 
16]);
+                       _mm512_storeu_si512(&cache_objs[copied], a);
+                       _mm512_storeu_si512(&cache_objs[copied + 16], b);
+#endif
                        copied += 32;
                }
                cache->len += n;
-- 
2.43.0

Reply via email to