The zawrs extension adds a pair of instructions that stall a core until a memory location is written to. This patch uses one of them to implement RISCV-specific versions of the rte_wait_until_equal_* functions. This is potentially more energy efficient than the default implementation that uses rte_pause/Zihintpause.
The technique works as follows: * Create a reservation set containing the address we want to wait on using an atomic load (lr.dw) * Call wrs.nto - this blocks until the reservation set is invalidated by someone else writing to that address * Execution can also resume arbitrarily, so we still need to check whether a change occurred and loop if not Due to RISC-V atomics only supporting naturally aligned word (32 bit) and double word (64 bit) loads, I've used pointer rounding and bit shifting to implement waiting on 16-bit values. This new functionality is controlled by a Meson flag that is disabled by default. Signed-off-by: Daniel Gregory <daniel.greg...@bytedance.com> Suggested-by: Punit Agrawal <punit.agra...@bytedance.com> --- Posting as an RFC to get early feedback and enable testing by others with Zawrs-enabled hardware. Whilst I have been able to test it compiles & passes tests using QEMU, I am waiting on some Zawrs-enabled hardware to become available before I carry out performance tests. Nonetheless, I would be glad to hear any feedback on the general approach. Thanks, Daniel config/riscv/meson.build | 5 ++ lib/eal/riscv/include/rte_pause.h | 139 ++++++++++++++++++++++++++++++ 2 files changed, 144 insertions(+) diff --git a/config/riscv/meson.build b/config/riscv/meson.build index 07d7d9da23..4cfdc42ecb 100644 --- a/config/riscv/meson.build +++ b/config/riscv/meson.build @@ -26,6 +26,11 @@ flags_common = [ # read from /proc/device-tree/cpus/timebase-frequency. This property is # guaranteed on Linux, as riscv time_init() requires it. ['RTE_RISCV_TIME_FREQ', 0], + + # Enable use of RISC-V Wait-on-Reservation-Set extension (Zawrs) + # Mitigates looping when polling on memory locations + # Make sure to add '_zawrs' to your target's -march below + ['RTE_RISCV_ZAWRS', false] ] ## SoC-specific options. diff --git a/lib/eal/riscv/include/rte_pause.h b/lib/eal/riscv/include/rte_pause.h index cb8e9ca52d..e7b43dffa3 100644 --- a/lib/eal/riscv/include/rte_pause.h +++ b/lib/eal/riscv/include/rte_pause.h @@ -11,6 +11,12 @@ extern "C" { #endif +#ifdef RTE_RISCV_ZAWRS +#define RTE_WAIT_UNTIL_EQUAL_ARCH_DEFINED +#endif + +#include <rte_debug.h> + #include "rte_atomic.h" #include "generic/rte_pause.h" @@ -24,6 +30,139 @@ static inline void rte_pause(void) asm volatile(".int 0x0100000F" : : : "memory"); } +#ifdef RTE_RISCV_ZAWRS + +/* + * Atomic load from an address, it returns either a sign-extended word or + * doubleword and creates a 'reservation set' containing the read memory + * location. When someone else writes to the reservation set, it is invalidated, + * causing any stalled WRS instructions to resume. + * + * Address needs to be naturally aligned. + */ +#define __RTE_RISCV_LR_32(src, dst, memorder) do { \ + if ((memorder) == rte_memory_order_relaxed) { \ + asm volatile("lr.w %0, (%1)" \ + : "=r" (dst) \ + : "r" (src) \ + : "memory"); \ + } else { \ + asm volatile("lr.w.aq %0, (%1)" \ + : "=r" (dst) \ + : "r" (src) \ + : "memory"); \ + } } while (0) +#define __RTE_RISCV_LR_64(src, dst, memorder) do { \ + if ((memorder) == rte_memory_order_relaxed) { \ + asm volatile("lr.d %0, (%1)" \ + : "=r" (dst) \ + : "r" (src) \ + : "memory"); \ + } else { \ + asm volatile("lr.d.aq %0, (%1)" \ + : "=r" (dst) \ + : "r" (src) \ + : "memory"); \ + } } while (0) + +/* + * There's not a RISC-V atomic load primitive for halfwords, so cast up to a + * _naturally aligned_ word and extract the halfword we want + */ +#define __RTE_RISCV_LR_16(src, dst, memorder) do { \ + uint32_t word; \ + __RTE_RISCV_LR_32(((uintptr_t)(src) & (~3)), word, (memorder)); \ + if ((size_t)(src) & 3) \ + (dst) = (typeof(dst))(word >> 16); \ + else \ + (dst) = (typeof(dst))(word & 0xFFFF); \ +} while (0) + +#define __RTE_RISCV_LR(src, dst, memorder, size) { \ + RTE_BUILD_BUG_ON(size != 16 && size != 32 && size != 64); \ + if (size == 16) \ + __RTE_RISCV_LR_16(src, dst, memorder); \ + else if (size == 32) \ + __RTE_RISCV_LR_32(src, dst, memorder); \ + else if (size == 64) \ + __RTE_RISCV_LR_64(src, dst, memorder); \ +} + +/* + * Wait-on-Reservation-Set extension instruction, it stalls execution until the + * reservation set is invalidated or an interrupt is observed. + * A loop is likely still needed as it may stop stalling arbitrarily. + */ +#define __RTE_RISCV_WRS_NTO() { asm volatile("wrs.nto" : : : "memory"); } + +static __rte_always_inline void +rte_wait_until_equal_16(volatile uint16_t *addr, uint16_t expected, + int memorder) +{ + uint16_t value; + + RTE_ASSERT(memorder == rte_memory_order_acquire || + memorder == rte_memory_order_relaxed); + RTE_ASSERT(((size_t)addr & 1) == 0); + + __RTE_RISCV_LR_16(addr, value, memorder); + while (value != expected) { + __RTE_RISCV_WRS_NTO(); + __RTE_RISCV_LR_16(addr, value, memorder); + } +} + +static __rte_always_inline void +rte_wait_until_equal_32(volatile uint32_t *addr, uint32_t expected, + int memorder) +{ + uint32_t value; + + RTE_ASSERT(memorder == rte_memory_order_acquire || + memorder == rte_memory_order_relaxed); + RTE_ASSERT(((size_t)addr & 3) == 0); + + __RTE_RISCV_LR_32(addr, value, memorder); + while (value != expected) { + __RTE_RISCV_WRS_NTO(); + __RTE_RISCV_LR_32(addr, value, memorder); + } +} + +static __rte_always_inline void +rte_wait_until_equal_64(volatile uint64_t *addr, uint64_t expected, + int memorder) +{ + uint64_t value; + + RTE_ASSERT(memorder == rte_memory_order_acquire || + memorder == rte_memory_order_relaxed); + RTE_ASSERT(((size_t)addr & 7) == 0); + + __RTE_RISCV_LR_64(addr, value, memorder); + while (value != expected) { + __RTE_RISCV_WRS_NTO(); + __RTE_RISCV_LR_64(addr, value, memorder); + } +} + +#define RTE_WAIT_UNTIL_MASKED(addr, mask, cond, expected, memorder) do { \ + RTE_BUILD_BUG_ON(!__builtin_constant_p(memorder)); \ + RTE_BUILD_BUG_ON(memorder != rte_memory_order_acquire && \ + memorder != rte_memory_order_relaxed); \ + RTE_ASSERT(((size_t)(addr) & (sizeof(*(addr)) - 1)) != 0); \ + const uint32_t size = sizeof(*(addr)) << 3; \ + typeof(*(addr)) expected_value = (expected); \ + typeof(*(addr)) value; \ + __RTE_RISCV_LR((addr), value, memorder, size); \ + while (!((value & (mask)) cond expected_value)) { \ + __RTE_RISCV_WRS_NTO(); \ + __RTE_RISCV_LR((addr), value, memorder, size); \ + } \ +} while (0) + +#endif /* RTE_RISCV_ZAWRS */ + #ifdef __cplusplus } #endif -- 2.39.2