This patch optimizes receive packets performance in arm platform. Signed-off-by: Xiaoyun wang <cloud.wangxiao...@huawei.com> --- drivers/net/hinic/hinic_pmd_rx.c | 17 +++++++++++++++++ drivers/net/hinic/hinic_pmd_rx.h | 11 +++++++++++ 2 files changed, 28 insertions(+)
diff --git a/drivers/net/hinic/hinic_pmd_rx.c b/drivers/net/hinic/hinic_pmd_rx.c index 37b4f5c..94071ee 100644 --- a/drivers/net/hinic/hinic_pmd_rx.c +++ b/drivers/net/hinic/hinic_pmd_rx.c @@ -950,6 +950,19 @@ void hinic_rx_alloc_pkts(struct hinic_rxq *rxq) } } +#if defined(__ARM64_NEON__) +static inline uint32_t __attribute__((always_inline)) +hinic_read_cqe_status(uintptr_t addr) +{ + uint32_t val; + + asm volatile("ldar %x[val], [%x[addr]]" + : [val] "=r" (val) + : [addr] "r" (addr)); + return val; +} +#endif + u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts) { struct rte_mbuf *rxm; @@ -972,7 +985,11 @@ u16 hinic_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, u16 nb_pkts) while (pkts < nb_pkts) { /* 2. current ci is done */ rx_cqe = &rxq->rx_cqe[sw_ci]; +#if defined(__X86_64_SSE__) status = rx_cqe->status; +#elif defined(__ARM64_NEON__) + status = hinic_read_cqe_status((uintptr_t)&rxq->rx_cqe[sw_ci]); +#endif if (!HINIC_GET_RX_DONE_BE(status)) break; diff --git a/drivers/net/hinic/hinic_pmd_rx.h b/drivers/net/hinic/hinic_pmd_rx.h index fe2735b..fa27e91 100644 --- a/drivers/net/hinic/hinic_pmd_rx.h +++ b/drivers/net/hinic/hinic_pmd_rx.h @@ -28,6 +28,7 @@ struct hinic_rq_ctrl { u32 ctrl_fmt; }; +#if defined(__X86_64_SSE__) struct hinic_rq_cqe { u32 status; u32 vlan_len; @@ -36,6 +37,16 @@ struct hinic_rq_cqe { u32 rsvd[4]; }; +#elif defined(__ARM64_NEON__) +struct hinic_rq_cqe { + u32 status; + u32 vlan_len; + u32 offload_type; + u32 rss_hash; + + u32 rsvd[4]; +} __rte_cache_aligned; +#endif struct hinic_rq_cqe_sect { struct hinic_sge sge; -- 1.8.3.1