Refcnt's atomic inc/dec ops are frequent and its idiom need no seq_cst order. So to get better performance, it worth to adopt _relaxed other than _seq_cst memory model on them.
We resort to gcc builtins. If gcc supports C11 memory model, __atomic_* buitlins is used, otherwise __sync_* builtins. Signed-off-by: Liu Ping Fan <pingf...@linux.vnet.ibm.com> --- include/qemu/atomic.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 0aa8913..1f474b7 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -183,8 +183,15 @@ #endif /* Provide shorter names for GCC atomic builtins. */ +#ifndef _GLIBCXX_ATOMIC_BUILTINS +/* close to C11 memory_order_seq_cst */ #define atomic_fetch_inc(ptr) __sync_fetch_and_add(ptr, 1) #define atomic_fetch_dec(ptr) __sync_fetch_and_add(ptr, -1) +#else +/* C11 memory_order_relaxed */ +#define atomic_fetch_inc(ptr) __atomic_fetch_add(ptr, 1, __ATOMIC_RELAXED) +#define atomic_fetch_dec(ptr) __atomic_fetch_add(ptr, -1, __ATOMIC_RELAXED) +#endif #define atomic_fetch_add __sync_fetch_and_add #define atomic_fetch_sub __sync_fetch_and_sub #define atomic_fetch_and __sync_fetch_and_and @@ -192,8 +199,15 @@ #define atomic_cmpxchg __sync_val_compare_and_swap /* And even shorter names that return void. */ +#ifndef _GLIBCXX_ATOMIC_BUILTINS +/* close to C11 memory_order_seq_cst */ #define atomic_inc(ptr) ((void) __sync_fetch_and_add(ptr, 1)) #define atomic_dec(ptr) ((void) __sync_fetch_and_add(ptr, -1)) +#else +/* C11 memory_order_relaxed */ +#define atomic_inc(ptr) ((void) __atomic_fetch_add(ptr, 1, __ATOMIC_RELAXED)) +#define atomic_dec(ptr) ((void) __atomic_fetch_add(ptr, -1, __ATOMIC_RELAXED)) +#endif #define atomic_add(ptr, n) ((void) __sync_fetch_and_add(ptr, n)) #define atomic_sub(ptr, n) ((void) __sync_fetch_and_sub(ptr, n)) #define atomic_and(ptr, n) ((void) __sync_fetch_and_and(ptr, n)) -- 1.8.1.4