The rte_atomic32_cmpset is deprecated. Initial attempts at
changing this with direct conversion to
rte_atomic_compare_exchange_weak_explicit()
regressed MP/MC contended performance on x86 by 10-30%,
because the C11 builtin's failure-writeback semantic forces
GCC to emit extra instructions on the CAS critical path.

Add an internal __rte_ring_compare_and_swap() wrapper that calls
__sync_bool_compare_and_swap() directly, which keeps the original
instruction sequence. Add equivalent function for MSVC.

Signed-off-by: Stephen Hemminger <[email protected]>
---
 lib/ring/rte_ring_generic_pvt.h | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

diff --git a/lib/ring/rte_ring_generic_pvt.h b/lib/ring/rte_ring_generic_pvt.h
index affd2d5ba7..0fb972de9e 100644
--- a/lib/ring/rte_ring_generic_pvt.h
+++ b/lib/ring/rte_ring_generic_pvt.h
@@ -18,6 +18,30 @@
  * For more information please refer to <rte_ring.h>.
  */
 
+/**
+ * @internal optimized version of compare exchange
+ *
+ * The C11 builtin's failure-writeback semantic generates worse code on x86.
+ * Unlike rte_atomic_compare_exchange_*_explicit(), this wrapper does NOT
+ * write the actual value back to a pointer on failure. Callers in a retry
+ * loop must reload the expected value explicitly on the next iteration.
+ *
+ * Full memory barrier, equivalent to rte_memory_order_seq_cst on both
+ * success and failure.
+ */
+static __rte_always_inline bool
+__rte_ring_compare_and_swap(volatile uint32_t *dst,
+                           uint32_t expected, uint32_t desired)
+{
+#if defined(RTE_TOOLCHAIN_MSVC)
+       return _InterlockedCompareExchange((volatile long *)dst,
+                                          (long)desired, (long)expected)
+               == (long)expected;
+#else
+       return __sync_bool_compare_and_swap(dst, expected, desired);
+#endif
+}
+
 /**
  * @internal This function updates tail values.
  */
@@ -108,10 +132,10 @@ __rte_ring_headtail_move_head(struct rte_ring_headtail *d,
                if (is_st) {
                        d->head = *new_head;
                        success = 1;
-               } else
-                       success = rte_atomic32_cmpset(
-                                       (uint32_t *)(uintptr_t)&d->head,
-                                       *old_head, *new_head);
+               } else {
+                       success = __rte_ring_compare_and_swap(
+                                       &d->head, *old_head, *new_head);
+               }
        } while (unlikely(success == 0));
        return n;
 }
-- 
2.53.0

Reply via email to