Force the use of cmpxchg16b on x86_64. Wikipedia suggests that only very old AMD64 (circa 2004) did not have this instruction. Further, it's required by Windows 8 so no new cpus will ever omit it.
If we truely care about these, then we could check this at startup time and then avoid executing paths that use it. Reviewed-by: Emilio G. Cota <c...@braap.org> Reviewed-by: Alex Bennée <alex.ben...@linaro.org> Signed-off-by: Richard Henderson <r...@twiddle.net> --- atomic_template.h | 40 +++++++++++++++++++++++++++++++++++++++- configure | 29 ++++++++++++++++++++++++++++- cputlb.c | 5 +++++ include/qemu/int128.h | 6 ++++++ tcg-runtime.c | 18 ++++++++++++++++++ tcg/tcg.h | 24 +++++++++++++++++++++++- 6 files changed, 119 insertions(+), 3 deletions(-) diff --git a/atomic_template.h b/atomic_template.h index cf83725..b400b2a 100644 --- a/atomic_template.h +++ b/atomic_template.h @@ -18,7 +18,11 @@ * License along with this library; if not, see <http://www.gnu.org/licenses/>. */ -#if DATA_SIZE == 8 +#if DATA_SIZE == 16 +# define SUFFIX o +# define DATA_TYPE Int128 +# define BSWAP bswap128 +#elif DATA_SIZE == 8 # define SUFFIX q # define DATA_TYPE uint64_t # define BSWAP bswap64 @@ -61,6 +65,21 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, return atomic_cmpxchg__nocheck(haddr, cmpv, newv); } +#if DATA_SIZE >= 16 +ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) +{ + DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + __atomic_load(haddr, &val, __ATOMIC_RELAXED); + return val; +} + +void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, + ABI_TYPE val EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + __atomic_store(haddr, &val, __ATOMIC_RELAXED); +} +#else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) { @@ -86,6 +105,8 @@ GEN_ATOMIC_HELPER(or_fetch) GEN_ATOMIC_HELPER(xor_fetch) #undef GEN_ATOMIC_HELPER +#endif /* DATA SIZE >= 16 */ + #undef END #if DATA_SIZE > 1 @@ -105,6 +126,22 @@ ABI_TYPE ATOMIC_NAME(cmpxchg)(CPUArchState *env, target_ulong addr, return BSWAP(atomic_cmpxchg__nocheck(haddr, BSWAP(cmpv), BSWAP(newv))); } +#if DATA_SIZE >= 16 +ABI_TYPE ATOMIC_NAME(ld)(CPUArchState *env, target_ulong addr EXTRA_ARGS) +{ + DATA_TYPE val, *haddr = ATOMIC_MMU_LOOKUP; + __atomic_load(haddr, &val, __ATOMIC_RELAXED); + return BSWAP(val); +} + +void ATOMIC_NAME(st)(CPUArchState *env, target_ulong addr, + ABI_TYPE val EXTRA_ARGS) +{ + DATA_TYPE *haddr = ATOMIC_MMU_LOOKUP; + val = BSWAP(val); + __atomic_store(haddr, &val, __ATOMIC_RELAXED); +} +#else ABI_TYPE ATOMIC_NAME(xchg)(CPUArchState *env, target_ulong addr, ABI_TYPE val EXTRA_ARGS) { @@ -166,6 +203,7 @@ ABI_TYPE ATOMIC_NAME(add_fetch)(CPUArchState *env, target_ulong addr, ldo = ldn; } } +#endif /* DATA_SIZE >= 16 */ #undef END #endif /* DATA_SIZE > 1 */ diff --git a/configure b/configure index dd9e679..a499705 100755 --- a/configure +++ b/configure @@ -1216,7 +1216,10 @@ case "$cpu" in cc_i386='$(CC) -m32' ;; x86_64) - CPU_CFLAGS="-m64" + # ??? Only extremely old AMD cpus do not have cmpxchg16b. + # If we truly care, we should simply detect this case at + # runtime and generate the fallback to serial emulation. + CPU_CFLAGS="-m64 -mcx16" LDFLAGS="-m64 $LDFLAGS" cc_i386='$(CC) -m32' ;; @@ -4491,6 +4494,26 @@ if compile_prog "" "" ; then int128=yes fi +######################################### +# See if 128-bit atomic operations are supported. + +atomic128=no +if test "$int128" = "yes"; then + cat > $TMPC << EOF +int main(void) +{ + unsigned __int128 x = 0, y = 0; + y = __atomic_load_16(&x, 0); + __atomic_store_16(&x, y, 0); + __atomic_compare_exchange_16(&x, &y, x, 0, 0, 0); + return 0; +} +EOF + if compile_prog "" "" ; then + atomic128=yes + fi +fi + ######################################## # check if getauxval is available. @@ -5447,6 +5470,10 @@ if test "$int128" = "yes" ; then echo "CONFIG_INT128=y" >> $config_host_mak fi +if test "$atomic128" = "yes" ; then + echo "CONFIG_ATOMIC128=y" >> $config_host_mak +fi + if test "$getauxval" = "yes" ; then echo "CONFIG_GETAUXVAL=y" >> $config_host_mak fi diff --git a/cputlb.c b/cputlb.c index 4f2c500..845b2a7 100644 --- a/cputlb.c +++ b/cputlb.c @@ -690,6 +690,11 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #define DATA_SIZE 8 #include "atomic_template.h" +#ifdef CONFIG_ATOMIC128 +#define DATA_SIZE 16 +#include "atomic_template.h" +#endif + /* Second set of helpers are directly callable from TCG as helpers. */ #undef EXTRA_ARGS diff --git a/include/qemu/int128.h b/include/qemu/int128.h index d4c6e44..5c9890d 100644 --- a/include/qemu/int128.h +++ b/include/qemu/int128.h @@ -2,6 +2,7 @@ #define INT128_H #ifdef CONFIG_INT128 +#include "qemu/bswap.h" typedef __int128_t Int128; @@ -137,6 +138,11 @@ static inline void int128_subfrom(Int128 *a, Int128 b) *a -= b; } +static inline Int128 bswap128(Int128 a) +{ + return int128_make128(bswap64(int128_gethi(a)), bswap64(int128_getlo(a))); +} + #else /* !CONFIG_INT128 */ typedef struct Int128 Int128; diff --git a/tcg-runtime.c b/tcg-runtime.c index aa55d12..e952153 100644 --- a/tcg-runtime.c +++ b/tcg-runtime.c @@ -133,4 +133,22 @@ static void *atomic_mmu_lookup(CPUArchState *env, target_ulong addr, #define DATA_SIZE 8 #include "atomic_template.h" +/* The following is only callable from other helpers, and matches up + with the softmmu version. */ + +#ifdef CONFIG_ATOMIC128 + +#undef EXTRA_ARGS +#undef ATOMIC_NAME +#undef ATOMIC_MMU_LOOKUP + +#define EXTRA_ARGS , TCGMemOpIdx oi, uintptr_t retaddr +#define ATOMIC_NAME(X) \ + HELPER(glue(glue(glue(atomic_ ## X, SUFFIX), END), _mmu)) +#define ATOMIC_MMU_LOOKUP atomic_mmu_lookup(env, addr, DATA_SIZE, retaddr) + +#define DATA_SIZE 16 +#include "atomic_template.h" +#endif /* CONFIG_ATOMIC128 */ + #endif /* !CONFIG_SOFTMMU */ diff --git a/tcg/tcg.h b/tcg/tcg.h index 5931965..bc3ea7a 100644 --- a/tcg/tcg.h +++ b/tcg/tcg.h @@ -1229,7 +1229,29 @@ GEN_ATOMIC_HELPER_ALL(xchg) #undef GEN_ATOMIC_HELPER_ALL #undef GEN_ATOMIC_HELPER - #endif /* CONFIG_SOFTMMU */ +#ifdef CONFIG_ATOMIC128 +#include "qemu/int128.h" + +/* These aren't really a "proper" helpers because TCG cannot manage Int128. + However, use the same format as the others, for use by the backends. */ +Int128 helper_atomic_cmpxchgo_le_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_cmpxchgo_be_mmu(CPUArchState *env, target_ulong addr, + Int128 cmpv, Int128 newv, + TCGMemOpIdx oi, uintptr_t retaddr); + +Int128 helper_atomic_ldo_le_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +Int128 helper_atomic_ldo_be_mmu(CPUArchState *env, target_ulong addr, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_le_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); +void helper_atomic_sto_be_mmu(CPUArchState *env, target_ulong addr, Int128 val, + TCGMemOpIdx oi, uintptr_t retaddr); + +#endif /* CONFIG_ATOMIC128 */ + #endif /* TCG_H */ -- 2.7.4