Add fast paths for FEAT_LSE2, using the detection in tcg. Signed-off-by: Richard Henderson <richard.hender...@linaro.org> --- accel/tcg/ldst_atomicity.c.inc | 37 ++++++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-)
diff --git a/accel/tcg/ldst_atomicity.c.inc b/accel/tcg/ldst_atomicity.c.inc index 9a95ac327d..277629f241 100644 --- a/accel/tcg/ldst_atomicity.c.inc +++ b/accel/tcg/ldst_atomicity.c.inc @@ -41,6 +41,8 @@ * but we're using tcg/tci/ instead. */ # define HAVE_al16_fast false +#elif defined(__aarch64__) +# define HAVE_al16_fast likely(have_lse2) #elif defined(__x86_64__) # define HAVE_al16_fast likely(have_atomic16) #else @@ -48,6 +50,8 @@ #endif #if defined(CONFIG_ATOMIC128) || defined(CONFIG_CMPXCHG128) # define HAVE_al16 true +#elif defined(__aarch64__) +# define HAVE_al16 true #else # define HAVE_al16 false #endif @@ -170,6 +174,14 @@ load_atomic16(void *pv) r.u = qatomic_read__nocheck(p); return r.s; +#elif defined(__aarch64__) + uint64_t l, h; + + /* Via HAVE_al16_fast, FEAT_LSE2 is present: LDP becomes atomic. */ + asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*(__uint128_t *)pv)); + + qemu_build_assert(!HOST_BIG_ENDIAN); + return int128_make128(l, h); #elif defined(__x86_64__) Int128Alias r; @@ -409,6 +421,18 @@ load_atom_extract_al16_or_al8(void *pv, int s) r = qatomic_read__nocheck(p16); } return r >> shr; +#elif defined(__aarch64__) + /* + * Via HAVE_al16_fast, FEAT_LSE2 is present. + * LDP becomes single-copy atomic if 16-byte aligned, and + * single-copy atomic on the parts if 8-byte aligned. + */ + uintptr_t pi = (uintptr_t)pv; + int shr = (pi & 7) * 8; + uint64_t l, h; + + asm("ldp %0, %1, %2" : "=r"(l), "=r"(h) : "m"(*(__uint128_t *)(pi & ~7))); + return (l >> shr) | (h << (-shr & 63)); #elif defined(__x86_64__) uintptr_t pi = (uintptr_t)pv; int shr = (pi & 7) * 8; @@ -764,10 +788,15 @@ store_atomic16(void *pv, Int128Alias val) l = int128_getlo(val.s); h = int128_gethi(val.s); - asm("0: ldxp %0, xzr, %1\n\t" - "stxp %w0, %2, %3, %1\n\t" - "cbnz %w0, 0b" - : "=&r"(t), "=Q"(*(__uint128_t *)pv) : "r"(l), "r"(h)); + if (HAVE_al16_fast) { + /* Via HAVE_al16_fast, FEAT_LSE2 is present: STP becomes atomic. */ + asm("stp %1, %2, %0" : "=Q"(*(__uint128_t *)pv) : "r"(l), "r"(h)); + } else { + asm("0: ldxp %0, xzr, %1\n\t" + "stxp %w0, %2, %3, %1\n\t" + "cbnz %w0, 0b" + : "=&r"(t), "=Q"(*(__uint128_t *)pv) : "r"(l), "r"(h)); + } return; } #elif defined(CONFIG_CMPXCHG128) -- 2.34.1