Updated odp_sync_stores() specification and added odp_sync_loads
to pair it. Used GCC __atomic_thread_fence to implement both of
those.

Signed-off-by: Petri Savolainen <petri.savolai...@nokia.com>
---
 include/odp/api/sync.h                    | 53 +++++++++++++++----------------
 platform/linux-generic/include/odp/sync.h | 18 +++++++++++
 2 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/include/odp/api/sync.h b/include/odp/api/sync.h
index 6477e74..8456f68 100644
--- a/include/odp/api/sync.h
+++ b/include/odp/api/sync.h
@@ -23,37 +23,34 @@ extern "C" {
  */
 
 /**
- * Synchronise stores
+ * Synchronize stores
  *
- * Ensures that all CPU store operations that precede the odp_sync_stores()
- * call are globally visible before any store operation that follows it.
+ * This call implements a write memory barrier between threads. It ensures that
+ * all (non-atomic or relaxed atomic) stores (from the calling thread) that
+ * precede this call are globally visible before any store operation that
+ * follows it. It prevents stores moving from before the call to after it.
+ *
+ * ODP synchronization mechanisms (e.g. barrier, unlocks, queue enqueues)
+ * include write barrier, so this call is not needed when using those.
+ *
+ * @see odp_sync_loads()
  */
-static inline void odp_sync_stores(void)
-{
-#if defined __x86_64__ || defined __i386__
-
-       __asm__  __volatile__ ("sfence\n" : : : "memory");
-
-#elif defined(__arm__)
-#if __ARM_ARCH == 6
-       __asm__ __volatile__ ("mcr p15, 0, %0, c7, c10, 5" \
-                       : : "r" (0) : "memory");
-#elif __ARM_ARCH >= 7 || defined __aarch64__
-
-       __asm__ __volatile__ ("dmb st" : : : "memory");
-#else
-       __asm__ __volatile__ ("" : : : "memory");
-#endif
-
-#elif defined __OCTEON__
-
-       __asm__  __volatile__ ("syncws\n" : : : "memory");
-
-#else
-       __sync_synchronize();
-#endif
-}
+void odp_sync_stores(void);
 
+/**
+ * Synchronize loads
+ *
+ * This call implements a read memory barrier. It ensures that all (non-atomic
+ * or relaxed atomic) loads that precede this call happen before any load
+ * operation that follows it. It prevents loads moving from after the call to
+ * before it.
+ *
+ * ODP synchronization mechanisms (e.g. barrier, locks, queue dequeues)
+ * include read barrier, so this call is not needed when using those.
+ *
+ * @see odp_sync_stores()
+ */
+void odp_sync_loads(void);
 
 /**
  * @}
diff --git a/platform/linux-generic/include/odp/sync.h 
b/platform/linux-generic/include/odp/sync.h
index bc73083..09b3939 100644
--- a/platform/linux-generic/include/odp/sync.h
+++ b/platform/linux-generic/include/odp/sync.h
@@ -17,6 +17,24 @@
 extern "C" {
 #endif
 
+/** @ingroup odp_barrier
+ *  @{
+ */
+
+static inline void odp_sync_stores(void)
+{
+       __atomic_thread_fence(__ATOMIC_RELEASE);
+}
+
+static inline void odp_sync_loads(void)
+{
+       __atomic_thread_fence(__ATOMIC_ACQUIRE);
+}
+
+/**
+ * @}
+ */
+
 #include <odp/api/sync.h>
 
 #ifdef __cplusplus
-- 
2.6.2

_______________________________________________
lng-odp mailing list
lng-odp@lists.linaro.org
https://lists.linaro.org/mailman/listinfo/lng-odp

Reply via email to