Instead of polling for tail to be updated, use wfe instruction. 50%~70% performance gain was measured by running ring_perf_autotest on ThunderX2.
Signed-off-by: Gavin Hu <gavin...@arm.com> Reviewed-by: Ruifeng Wang <ruifeng.w...@arm.com> Reviewed-by: Steve Capper <steve.cap...@arm.com> Reviewed-by: Ola Liljedahl <ola.liljed...@arm.com> Reviewed-by: Honnappa Nagarahalli <honnappa.nagaraha...@arm.com> --- lib/librte_ring/rte_ring_c11_mem.h | 5 +++-- lib/librte_ring/rte_ring_generic.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/lib/librte_ring/rte_ring_c11_mem.h b/lib/librte_ring/rte_ring_c11_mem.h index 0fb73a3..f1de79c 100644 --- a/lib/librte_ring/rte_ring_c11_mem.h +++ b/lib/librte_ring/rte_ring_c11_mem.h @@ -2,6 +2,7 @@ * * Copyright (c) 2017,2018 HXT-semitech Corporation. * Copyright (c) 2007-2009 Kip Macy km...@freebsd.org + * Copyright (c) 2019 Arm Limited * All rights reserved. * Derived from FreeBSD's bufring.h * Used as BSD-3 Licensed with permission from Kip Macy. @@ -21,8 +22,8 @@ update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val, * we need to wait for them to complete */ if (!single) - while (unlikely(ht->tail != old_val)) - rte_pause(); + if (unlikely(ht->tail != old_val)) + rte_wait_until_equal_relaxed(&ht->tail, old_val); __atomic_store_n(&ht->tail, new_val, __ATOMIC_RELEASE); } diff --git a/lib/librte_ring/rte_ring_generic.h b/lib/librte_ring/rte_ring_generic.h index 953cdbb..bb0dce0 100644 --- a/lib/librte_ring/rte_ring_generic.h +++ b/lib/librte_ring/rte_ring_generic.h @@ -23,8 +23,8 @@ update_tail(struct rte_ring_headtail *ht, uint32_t old_val, uint32_t new_val, * we need to wait for them to complete */ if (!single) - while (unlikely(ht->tail != old_val)) - rte_pause(); + if (unlikely(ht->tail != old_val)) + rte_wait_until_equal_relaxed(&ht->tail, old_val); ht->tail = new_val; } -- 2.7.4