On weak memory order architecture like POWER, rte_smp_wmb/rte_smp_rmb need to use CPU instructions, not compiler barrier. This patch fixes this. Also, to improve performance on PPC64, use light weight sync instruction instead of sync instruction.
Signed-off-by: Chao Zhu <chaozhu at linux.vnet.ibm.com> --- .../common/include/arch/ppc_64/rte_atomic.h | 12 ++++++++++-- 1 files changed, 10 insertions(+), 2 deletions(-) diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h index feae486..924e894 100644 --- a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -62,7 +62,11 @@ extern "C" { * Guarantees that the STORE operations generated before the barrier * occur before the STORE operations generated after. */ +#ifdef RTE_ARCH_64 +#define rte_wmb() {asm volatile("lwsync" : : : "memory"); } +#else #define rte_wmb() {asm volatile("sync" : : : "memory"); } +#endif /** * Read memory barrier. @@ -70,13 +74,17 @@ extern "C" { * Guarantees that the LOAD operations generated before the barrier * occur before the LOAD operations generated after. */ +#ifdef RTE_ARCH_64 +#define rte_rmb() {asm volatile("lwsync" : : : "memory"); } +#else #define rte_rmb() {asm volatile("sync" : : : "memory"); } +#endif #define rte_smp_mb() rte_mb() -#define rte_smp_wmb() rte_compiler_barrier() +#define rte_smp_wmb() rte_wmb() -#define rte_smp_rmb() rte_compiler_barrier() +#define rte_smp_rmb() rte_rmb() /*------------------------- 16 bit atomic operations -------------------------*/ /* To be compatible with Power7, use GCC built-in functions for 16 bit -- 1.7.1