Ping. Waiting to be merged. From: EXT Bill Fischofer [mailto:bill.fischo...@linaro.org] Sent: Sunday, December 13, 2015 12:44 AM To: Savolainen, Petri (Nokia - FI/Espoo) Cc: LNG ODP Mailman List Subject: Re: [lng-odp] [API-NEXT PATCH 1/2] api: barrier: added memory barriers
On Fri, Dec 11, 2015 at 5:30 AM, Petri Savolainen <petri.savolai...@nokia.com<mailto:petri.savolai...@nokia.com>> wrote: Added new memory barriers. These follow C11 release / acquire specification and replaces odp_sync_stores(). Used GCC __atomic_thread_fence to implement all three barriers. Signed-off-by: Petri Savolainen <petri.savolai...@nokia.com<mailto:petri.savolai...@nokia.com>> Reviewed-by: Bill Fischofer <bill.fischo...@linaro.org<mailto:bill.fischo...@linaro.org>> --- include/odp/api/barrier.h | 11 ++++- include/odp/api/sync.h | 82 ++++++++++++++++++++----------- platform/linux-generic/include/odp/sync.h | 28 +++++++++++ 3 files changed, 90 insertions(+), 31 deletions(-) diff --git a/include/odp/api/barrier.h b/include/odp/api/barrier.h index 8ca2647..823eae6 100644 --- a/include/odp/api/barrier.h +++ b/include/odp/api/barrier.h @@ -18,8 +18,15 @@ extern "C" { #endif -/** @defgroup odp_barrier ODP BARRIER - * Thread excution and memory ordering barriers. +/** + * @defgroup odp_barrier ODP BARRIER + * Thread excution and memory ordering barriers. + * + * @details + * <b> Thread execution barrier (odp_barrier_t) </b> + * + * Thread execution barrier synchronizes a group of threads to wait on the + * barrier until the entire group has reached the barrier. * @{ */ diff --git a/include/odp/api/sync.h b/include/odp/api/sync.h index 6477e74..c6f790c 100644 --- a/include/odp/api/sync.h +++ b/include/odp/api/sync.h @@ -8,7 +8,7 @@ /** * @file * - * ODP synchronisation + * ODP memory barriers */ #ifndef ODP_API_SYNC_H_ @@ -18,42 +18,66 @@ extern "C" { #endif -/** @addtogroup odp_barrier +/** + * @addtogroup odp_barrier + * @details + * <b> Memory barriers </b> + * + * Memory barriers enforce ordering of memory load and store operations + * specified before and after the barrier. These barriers may affect both + * compiler optimizations and CPU out-of-order execution. All ODP + * synchronization mechanisms (e.g. execution barriers, locks, queues, etc ) + * include all necessary memory barriers, so these calls are not needed when + * using those. Also ODP atomic operations have memory ordered versions. These + * explicit barriers may be needed when thread synchronization is based on + * a non-ODP defined mechanism. Depending on the HW platform, heavy usage of + * memory barriers may cause significant performance degradation. + * * @{ */ /** - * Synchronise stores + * Memory barrier for release operations * - * Ensures that all CPU store operations that precede the odp_sync_stores() - * call are globally visible before any store operation that follows it. + * This memory barrier has release semantics. It synchronizes with a pairing + * barrier for acquire operations. The releasing and acquiring threads + * synchronize through shared memory. The releasing thread must call this + * barrier before signaling the acquiring thread. After the acquiring thread + * receives the signal, it must call odp_mb_acquire() before it reads the + * memory written by the releasing thread. + * + * This call is not needed when using ODP defined synchronization mechanisms. + * + * @see odp_mb_acquire() */ -static inline void odp_sync_stores(void) -{ -#if defined __x86_64__ || defined __i386__ - - __asm__ __volatile__ ("sfence\n" : : : "memory"); - -#elif defined(__arm__) -#if __ARM_ARCH == 6 - __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \ - : : "r" (0) : "memory"); -#elif __ARM_ARCH >= 7 || defined __aarch64__ - - __asm__ __volatile__ ("dmb st" : : : "memory"); -#else - __asm__ __volatile__ ("" : : : "memory"); -#endif - -#elif defined __OCTEON__ - - __asm__ __volatile__ ("syncws\n" : : : "memory"); +void odp_mb_release(void); -#else - __sync_synchronize(); -#endif -} +/** + * Memory barrier for acquire operations + * + * This memory barrier has acquire semantics. It synchronizes with a pairing + * barrier for release operations. The releasing and acquiring threads + * synchronize through shared memory. The releasing thread must call + * odp_mb_release() before signaling the acquiring thread. After the acquiring + * thread receives the signal, it must call this barrier before it reads the + * memory written by the releasing thread. + * + * This call is not needed when using ODP defined synchronization mechanisms. + * + * @see odp_mb_release() + */ +void odp_mb_acquire(void); +/** + * Full memory barrier + * + * This is a full memory barrier. It guarantees that all load and store + * operations specified before it are visible to other threads before + * all load and store operations specified after it. + * + * This call is not needed when using ODP defined synchronization mechanisms. + */ +void odp_mb_full(void); /** * @} diff --git a/platform/linux-generic/include/odp/sync.h b/platform/linux-generic/include/odp/sync.h index bc73083..bfe67ee 100644 --- a/platform/linux-generic/include/odp/sync.h +++ b/platform/linux-generic/include/odp/sync.h @@ -17,6 +17,34 @@ extern "C" { #endif +/** @ingroup odp_barrier + * @{ + */ + +static inline void odp_mb_release(void) +{ + __atomic_thread_fence(__ATOMIC_RELEASE); +} + +static inline void odp_mb_acquire(void) +{ + __atomic_thread_fence(__ATOMIC_ACQUIRE); +} + +static inline void odp_mb_full(void) +{ + __atomic_thread_fence(__ATOMIC_SEQ_CST); +} + +static inline void odp_sync_stores(void) +{ + __atomic_thread_fence(__ATOMIC_RELEASE); +} + +/** + * @} + */ + #include <odp/api/sync.h> #ifdef __cplusplus -- 2.6.3 _______________________________________________ lng-odp mailing list lng-odp@lists.linaro.org<mailto:lng-odp@lists.linaro.org> https://lists.linaro.org/mailman/listinfo/lng-odp
_______________________________________________ lng-odp mailing list lng-odp@lists.linaro.org https://lists.linaro.org/mailman/listinfo/lng-odp