Updated odp_sync_stores() specification and added odp_sync_loads to pair it. Used GCC __atomic_thread_fence to implement both of those.
Signed-off-by: Petri Savolainen <petri.savolai...@nokia.com> --- include/odp/api/sync.h | 53 +++++++++++++++---------------- platform/linux-generic/include/odp/sync.h | 18 +++++++++++ 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/include/odp/api/sync.h b/include/odp/api/sync.h index 6477e74..8456f68 100644 --- a/include/odp/api/sync.h +++ b/include/odp/api/sync.h @@ -23,37 +23,34 @@ extern "C" { */ /** - * Synchronise stores + * Synchronize stores * - * Ensures that all CPU store operations that precede the odp_sync_stores() - * call are globally visible before any store operation that follows it. + * This call implements a write memory barrier between threads. It ensures that + * all (non-atomic or relaxed atomic) stores (from the calling thread) that + * precede this call are globally visible before any store operation that + * follows it. It prevents stores moving from before the call to after it. + * + * ODP synchronization mechanisms (e.g. barrier, unlocks, queue enqueues) + * include write barrier, so this call is not needed when using those. + * + * @see odp_sync_loads() */ -static inline void odp_sync_stores(void) -{ -#if defined __x86_64__ || defined __i386__ - - __asm__ __volatile__ ("sfence\n" : : : "memory"); - -#elif defined(__arm__) -#if __ARM_ARCH == 6 - __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ - : : "r" (0) : "memory"); -#elif __ARM_ARCH >= 7 || defined __aarch64__ - - __asm__ __volatile__ ("dmb st" : : : "memory"); -#else - __asm__ __volatile__ ("" : : : "memory"); -#endif - -#elif defined __OCTEON__ - - __asm__ __volatile__ ("syncws\n" : : : "memory"); - -#else - __sync_synchronize(); -#endif -} +void odp_sync_stores(void); +/** + * Synchronize loads + * + * This call implements a read memory barrier. It ensures that all (non-atomic + * or relaxed atomic) loads that precede this call happen before any load + * operation that follows it. It prevents loads moving from after the call to + * before it. + * + * ODP synchronization mechanisms (e.g. barrier, locks, queue dequeues) + * include read barrier, so this call is not needed when using those. + * + * @see odp_sync_stores() + */ +void odp_sync_loads(void); /** * @} diff --git a/platform/linux-generic/include/odp/sync.h b/platform/linux-generic/include/odp/sync.h index bc73083..09b3939 100644 --- a/platform/linux-generic/include/odp/sync.h +++ b/platform/linux-generic/include/odp/sync.h @@ -17,6 +17,24 @@ extern "C" { #endif +/** @ingroup odp_barrier + * @{ + */ + +static inline void odp_sync_stores(void) +{ + __atomic_thread_fence(__ATOMIC_RELEASE); +} + +static inline void odp_sync_loads(void) +{ + __atomic_thread_fence(__ATOMIC_ACQUIRE); +} + +/** + * @} + */ + #include <odp/api/sync.h> #ifdef __cplusplus -- 2.6.2 _______________________________________________ lng-odp mailing list lng-odp@lists.linaro.org https://lists.linaro.org/mailman/listinfo/lng-odp