Packet header is prefetched before packet processing for better
memory access performance. As L2 header will be updated by l3fwd,
using of prefetch for store hint will set cache line to proper
status and reduce cache maintenance overhead.

With this change, 12.9% performance uplift was measured on N1SDP
platform with MLX5 NIC.

Suggested-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>

Signed-off-by: Ruifeng Wang <ruifeng.w...@arm.com>
Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com>
---
 examples/l3fwd/l3fwd_lpm_neon.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/l3fwd/l3fwd_lpm_neon.h b/examples/l3fwd/l3fwd_lpm_neon.h
index d6c0ba64a..ae8840694 100644
--- a/examples/l3fwd/l3fwd_lpm_neon.h
+++ b/examples/l3fwd/l3fwd_lpm_neon.h
@@ -97,13 +97,13 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf 
**pkts_burst,
 
        if (k) {
                for (i = 0; i < FWDSTEP; i++) {
-                       rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[i],
+                       rte_prefetch0_write(rte_pktmbuf_mtod(pkts_burst[i],
                                                struct rte_ether_hdr *) + 1);
                }
 
                for (j = 0; j != k - FWDSTEP; j += FWDSTEP) {
                        for (i = 0; i < FWDSTEP; i++) {
-                               rte_prefetch0(rte_pktmbuf_mtod(
+                               rte_prefetch0_write(rte_pktmbuf_mtod(
                                                pkts_burst[j + i + FWDSTEP],
                                                struct rte_ether_hdr *) + 1);
                        }
@@ -124,17 +124,17 @@ l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf 
**pkts_burst,
                /* Prefetch last up to 3 packets one by one */
                switch (m) {
                case 3:
-                       rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
+                       rte_prefetch0_write(rte_pktmbuf_mtod(pkts_burst[j],
                                                struct rte_ether_hdr *) + 1);
                        j++;
                        /* fallthrough */
                case 2:
-                       rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
+                       rte_prefetch0_write(rte_pktmbuf_mtod(pkts_burst[j],
                                                struct rte_ether_hdr *) + 1);
                        j++;
                        /* fallthrough */
                case 1:
-                       rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j],
+                       rte_prefetch0_write(rte_pktmbuf_mtod(pkts_burst[j],
                                                struct rte_ether_hdr *) + 1);
                        j++;
                }
-- 
2.25.1

Reply via email to