On Thu, Feb 18, 2016 at 6:02 PM, Alex Bennée <alex.ben...@linaro.org> wrote: > > Alvise Rigo <a.r...@virtualopensystems.com> writes: > >> Use the new LL/SC runtime helpers to handle the ARM atomic instructions >> in softmmu_llsc_template.h. >> >> In general, the helper generator >> gen_{ldrex,strex}_{8,16a,32a,64a}() calls the function >> helper_{le,be}_{ldlink,stcond}{ub,uw,ul,q}_mmu() implemented in >> softmmu_llsc_template.h, doing an alignment check. >> >> In addition, add a simple helper function to emulate the CLREX instruction. >> >> Suggested-by: Jani Kokkonen <jani.kokko...@huawei.com> >> Suggested-by: Claudio Fontana <claudio.font...@huawei.com> >> Signed-off-by: Alvise Rigo <a.r...@virtualopensystems.com> >> --- >> target-arm/cpu.h | 2 + >> target-arm/helper.h | 4 ++ >> target-arm/machine.c | 2 + >> target-arm/op_helper.c | 10 +++ >> target-arm/translate.c | 188 >> +++++++++++++++++++++++++++++++++++++++++++++++-- >> 5 files changed, 202 insertions(+), 4 deletions(-) >> >> diff --git a/target-arm/cpu.h b/target-arm/cpu.h >> index b8b3364..bb5361f 100644 >> --- a/target-arm/cpu.h >> +++ b/target-arm/cpu.h >> @@ -462,9 +462,11 @@ typedef struct CPUARMState { >> float_status fp_status; >> float_status standard_fp_status; >> } vfp; >> +#if !defined(CONFIG_ARM_USE_LDST_EXCL) >> uint64_t exclusive_addr; >> uint64_t exclusive_val; >> uint64_t exclusive_high; >> +#endif >> #if defined(CONFIG_USER_ONLY) >> uint64_t exclusive_test; >> uint32_t exclusive_info; >> diff --git a/target-arm/helper.h b/target-arm/helper.h >> index c2a85c7..6bc3c0a 100644 >> --- a/target-arm/helper.h >> +++ b/target-arm/helper.h >> @@ -532,6 +532,10 @@ DEF_HELPER_2(dc_zva, void, env, i64) >> DEF_HELPER_FLAGS_2(neon_pmull_64_lo, TCG_CALL_NO_RWG_SE, i64, i64, i64) >> DEF_HELPER_FLAGS_2(neon_pmull_64_hi, TCG_CALL_NO_RWG_SE, i64, i64, i64) >> >> +#ifdef CONFIG_ARM_USE_LDST_EXCL >> +DEF_HELPER_1(atomic_clear, void, env) >> +#endif >> + >> #ifdef TARGET_AARCH64 >> #include "helper-a64.h" >> #endif >> diff --git a/target-arm/machine.c b/target-arm/machine.c >> index ed1925a..7adfb4d 100644 >> --- a/target-arm/machine.c >> +++ b/target-arm/machine.c >> @@ -309,9 +309,11 @@ const VMStateDescription vmstate_arm_cpu = { >> VMSTATE_VARRAY_INT32(cpreg_vmstate_values, ARMCPU, >> cpreg_vmstate_array_len, >> 0, vmstate_info_uint64, uint64_t), >> +#if !defined(CONFIG_ARM_USE_LDST_EXCL) >> VMSTATE_UINT64(env.exclusive_addr, ARMCPU), >> VMSTATE_UINT64(env.exclusive_val, ARMCPU), >> VMSTATE_UINT64(env.exclusive_high, ARMCPU), >> +#endif > > Hmm this does imply we either need to support migration of the LL/SC > state in the generic code or map the generic state into the ARM specific > machine state or we'll break migration. > > The later if probably better so you can save machine state from a > pre-LL/SC build and migrate to a new LL/SC enabled build.
This basically would require to add in cpu_pre_save some code to copy env.exclusive_* to the new structures. As a consequence, this will not get rid of the variables pre-LL/SC. > >> VMSTATE_UINT64(env.features, ARMCPU), >> VMSTATE_UINT32(env.exception.syndrome, ARMCPU), >> VMSTATE_UINT32(env.exception.fsr, ARMCPU), >> diff --git a/target-arm/op_helper.c b/target-arm/op_helper.c >> index a5ee65f..404c13b 100644 >> --- a/target-arm/op_helper.c >> +++ b/target-arm/op_helper.c >> @@ -51,6 +51,14 @@ static int exception_target_el(CPUARMState *env) >> return target_el; >> } >> >> +#ifdef CONFIG_ARM_USE_LDST_EXCL >> +void HELPER(atomic_clear)(CPUARMState *env) >> +{ >> + ENV_GET_CPU(env)->excl_protected_range.begin = EXCLUSIVE_RESET_ADDR; >> + ENV_GET_CPU(env)->ll_sc_context = false; >> +} >> +#endif >> + > > Given this is just touching generic CPU state this helper should probably be > part of the generic TCG runtime. I assume other arches will just call > this helper as well. Would it make sense instead to add a new CPUClass hook for this? Other architectures might want a different behaviour (or add something else). Thank you, alvise > > >> uint32_t HELPER(neon_tbl)(CPUARMState *env, uint32_t ireg, uint32_t def, >> uint32_t rn, uint32_t maxindex) >> { >> @@ -689,7 +697,9 @@ void HELPER(exception_return)(CPUARMState *env) >> >> aarch64_save_sp(env, cur_el); >> >> +#if !defined(CONFIG_ARM_USE_LDST_EXCL) >> env->exclusive_addr = -1; >> +#endif >> >> /* We must squash the PSTATE.SS bit to zero unless both of the >> * following hold: >> diff --git a/target-arm/translate.c b/target-arm/translate.c >> index cff511b..5150841 100644 >> --- a/target-arm/translate.c >> +++ b/target-arm/translate.c >> @@ -60,8 +60,10 @@ TCGv_ptr cpu_env; >> static TCGv_i64 cpu_V0, cpu_V1, cpu_M0; >> static TCGv_i32 cpu_R[16]; >> TCGv_i32 cpu_CF, cpu_NF, cpu_VF, cpu_ZF; >> +#if !defined(CONFIG_ARM_USE_LDST_EXCL) >> TCGv_i64 cpu_exclusive_addr; >> TCGv_i64 cpu_exclusive_val; >> +#endif >> #ifdef CONFIG_USER_ONLY >> TCGv_i64 cpu_exclusive_test; >> TCGv_i32 cpu_exclusive_info; >> @@ -94,10 +96,12 @@ void arm_translate_init(void) >> cpu_VF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, VF), >> "VF"); >> cpu_ZF = tcg_global_mem_new_i32(TCG_AREG0, offsetof(CPUARMState, ZF), >> "ZF"); >> >> +#if !defined(CONFIG_ARM_USE_LDST_EXCL) >> cpu_exclusive_addr = tcg_global_mem_new_i64(TCG_AREG0, >> offsetof(CPUARMState, exclusive_addr), "exclusive_addr"); >> cpu_exclusive_val = tcg_global_mem_new_i64(TCG_AREG0, >> offsetof(CPUARMState, exclusive_val), "exclusive_val"); >> +#endif >> #ifdef CONFIG_USER_ONLY >> cpu_exclusive_test = tcg_global_mem_new_i64(TCG_AREG0, >> offsetof(CPUARMState, exclusive_test), "exclusive_test"); >> @@ -7413,15 +7417,145 @@ static void gen_logicq_cc(TCGv_i32 lo, TCGv_i32 hi) >> tcg_gen_or_i32(cpu_ZF, lo, hi); >> } >> >> -/* Load/Store exclusive instructions are implemented by remembering >> +/* If the softmmu is enabled, the translation of Load/Store exclusive >> + instructions will rely on the gen_helper_{ldlink,stcond} helpers, >> + offloading most of the work to the softmmu_llsc_template.h functions. >> + All the accesses made by the exclusive instructions include an >> + alignment check. >> + >> + Otherwise, these instructions are implemented by remembering >> the value/address loaded, and seeing if these are the same >> when the store is performed. This should be sufficient to implement >> the architecturally mandated semantics, and avoids having to monitor >> regular stores. >> >> - In system emulation mode only one CPU will be running at once, so >> - this sequence is effectively atomic. In user emulation mode we >> - throw an exception and handle the atomic operation elsewhere. */ >> + In user emulation mode we throw an exception and handle the atomic >> + operation elsewhere. */ >> +#ifdef CONFIG_ARM_USE_LDST_EXCL >> + >> +#if TARGET_LONG_BITS == 32 >> +#define DO_GEN_LDREX(SUFF) \ >> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr, \ >> + TCGv_i32 index) \ >> +{ \ >> + gen_helper_ldlink_##SUFF(dst, cpu_env, addr, index); \ >> +} >> + >> +#define DO_GEN_STREX(SUFF) \ >> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr, \ >> + TCGv_i32 val, TCGv_i32 index) \ >> +{ \ >> + gen_helper_stcond_##SUFF(dst, cpu_env, addr, val, index); \ >> +} >> + >> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 >> index) >> +{ >> + gen_helper_ldlink_i64a(dst, cpu_env, addr, index); >> +} >> + >> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val, >> + TCGv_i32 index) >> +{ >> + >> + gen_helper_stcond_i64a(dst, cpu_env, addr, val, index); >> +} >> +#else >> +#define DO_GEN_LDREX(SUFF) \ >> +static inline void gen_ldrex_##SUFF(TCGv_i32 dst, TCGv_i32 addr, \ >> + TCGv_i32 index) \ >> +{ \ >> + TCGv addr64 = tcg_temp_new(); \ >> + tcg_gen_extu_i32_i64(addr64, addr); \ >> + gen_helper_ldlink_##SUFF(dst, cpu_env, addr64, index); \ >> + tcg_temp_free(addr64); \ >> +} >> + >> +#define DO_GEN_STREX(SUFF) \ >> +static inline void gen_strex_##SUFF(TCGv_i32 dst, TCGv_i32 addr, \ >> + TCGv_i32 val, TCGv_i32 index) \ >> +{ \ >> + TCGv addr64 = tcg_temp_new(); \ >> + TCGv dst64 = tcg_temp_new(); \ >> + tcg_gen_extu_i32_i64(addr64, addr); \ >> + gen_helper_stcond_##SUFF(dst64, cpu_env, addr64, val, index); \ >> + tcg_gen_extrl_i64_i32(dst, dst64); \ >> + tcg_temp_free(dst64); \ >> + tcg_temp_free(addr64); \ >> +} >> + >> +static inline void gen_ldrex_i64a(TCGv_i64 dst, TCGv_i32 addr, TCGv_i32 >> index) >> +{ >> + TCGv addr64 = tcg_temp_new(); >> + tcg_gen_extu_i32_i64(addr64, addr); >> + gen_helper_ldlink_i64a(dst, cpu_env, addr64, index); >> + tcg_temp_free(addr64); >> +} >> + >> +static inline void gen_strex_i64a(TCGv_i32 dst, TCGv_i32 addr, TCGv_i64 val, >> + TCGv_i32 index) >> +{ >> + TCGv addr64 = tcg_temp_new(); >> + TCGv dst64 = tcg_temp_new(); >> + >> + tcg_gen_extu_i32_i64(addr64, addr); >> + gen_helper_stcond_i64a(dst64, cpu_env, addr64, val, index); >> + tcg_gen_extrl_i64_i32(dst, dst64); >> + >> + tcg_temp_free(dst64); >> + tcg_temp_free(addr64); >> +} >> +#endif >> + >> +#if defined(CONFIG_ARM_USE_LDST_EXCL) >> +DO_GEN_LDREX(i8) >> +DO_GEN_LDREX(i16a) >> +DO_GEN_LDREX(i32a) >> + >> +DO_GEN_STREX(i8) >> +DO_GEN_STREX(i16a) >> +DO_GEN_STREX(i32a) >> +#endif >> + >> +static void gen_load_exclusive(DisasContext *s, int rt, int rt2, >> + TCGv_i32 addr, int size) >> + { >> + TCGv_i32 tmp = tcg_temp_new_i32(); >> + TCGv_i32 mem_idx = tcg_temp_new_i32(); >> + >> + tcg_gen_movi_i32(mem_idx, get_mem_index(s)); >> + >> + if (size != 3) { >> + switch (size) { >> + case 0: >> + gen_ldrex_i8(tmp, addr, mem_idx); >> + break; >> + case 1: >> + gen_ldrex_i16a(tmp, addr, mem_idx); >> + break; >> + case 2: >> + gen_ldrex_i32a(tmp, addr, mem_idx); >> + break; >> + default: >> + abort(); >> + } >> + >> + store_reg(s, rt, tmp); >> + } else { >> + TCGv_i64 tmp64 = tcg_temp_new_i64(); >> + TCGv_i32 tmph = tcg_temp_new_i32(); >> + >> + gen_ldrex_i64a(tmp64, addr, mem_idx); >> + tcg_gen_extr_i64_i32(tmp, tmph, tmp64); >> + >> + store_reg(s, rt, tmp); >> + store_reg(s, rt2, tmph); >> + >> + tcg_temp_free_i64(tmp64); >> + } >> + >> + tcg_temp_free_i32(mem_idx); >> +} >> +#else >> static void gen_load_exclusive(DisasContext *s, int rt, int rt2, >> TCGv_i32 addr, int size) >> { >> @@ -7460,10 +7594,15 @@ static void gen_load_exclusive(DisasContext *s, int >> rt, int rt2, >> store_reg(s, rt, tmp); >> tcg_gen_extu_i32_i64(cpu_exclusive_addr, addr); >> } >> +#endif >> >> static void gen_clrex(DisasContext *s) >> { >> +#ifdef CONFIG_ARM_USE_LDST_EXCL >> + gen_helper_atomic_clear(cpu_env); >> +#else >> tcg_gen_movi_i64(cpu_exclusive_addr, -1); >> +#endif >> } >> >> #ifdef CONFIG_USER_ONLY >> @@ -7475,6 +7614,47 @@ static void gen_store_exclusive(DisasContext *s, int >> rd, int rt, int rt2, >> size | (rd << 4) | (rt << 8) | (rt2 << 12)); >> gen_exception_internal_insn(s, 4, EXCP_STREX); >> } >> +#elif defined CONFIG_ARM_USE_LDST_EXCL >> +static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, >> + TCGv_i32 addr, int size) >> +{ >> + TCGv_i32 tmp, mem_idx; >> + >> + mem_idx = tcg_temp_new_i32(); >> + >> + tcg_gen_movi_i32(mem_idx, get_mem_index(s)); >> + tmp = load_reg(s, rt); >> + >> + if (size != 3) { >> + switch (size) { >> + case 0: >> + gen_strex_i8(cpu_R[rd], addr, tmp, mem_idx); >> + break; >> + case 1: >> + gen_strex_i16a(cpu_R[rd], addr, tmp, mem_idx); >> + break; >> + case 2: >> + gen_strex_i32a(cpu_R[rd], addr, tmp, mem_idx); >> + break; >> + default: >> + abort(); >> + } >> + } else { >> + TCGv_i64 tmp64; >> + TCGv_i32 tmp2; >> + >> + tmp64 = tcg_temp_new_i64(); >> + tmp2 = load_reg(s, rt2); >> + tcg_gen_concat_i32_i64(tmp64, tmp, tmp2); >> + gen_strex_i64a(cpu_R[rd], addr, tmp64, mem_idx); >> + >> + tcg_temp_free_i32(tmp2); >> + tcg_temp_free_i64(tmp64); >> + } >> + >> + tcg_temp_free_i32(tmp); >> + tcg_temp_free_i32(mem_idx); >> +} >> #else >> static void gen_store_exclusive(DisasContext *s, int rd, int rt, int rt2, >> TCGv_i32 addr, int size) > > > -- > Alex Bennée