Now we have the event stream and SEV/SEVL implemented we can finally enable WFET for Aarch64.
To avoid issues with QEMU's incomplete ldst exclusive handling causing potential deadlocks in common WFE enabled locking patterns we take advantage of the architectures flexibility and treat being in the exclusive region as a reason to exit. Reviewed-by: Richard Henderson <[email protected]> Signed-off-by: Alex Bennée <[email protected]> --- v2 - fix exception syndrome by using enum value - use env->halt_reason v3 - fix check_wfx_trap(s/false/true/) as it is a WFE v4 - defer expensive calculations until needed - treat cs->exclusive_addr as a IMPDEF WFE exit - update commit message v5 - use atomic_xchg to consume event_register --- target/arm/tcg/helper-defs.h | 1 + target/arm/tcg/op_helper.c | 94 ++++++++++++++++++++++++++++++++++ target/arm/tcg/translate-a64.c | 15 +++--- 3 files changed, 103 insertions(+), 7 deletions(-) diff --git a/target/arm/tcg/helper-defs.h b/target/arm/tcg/helper-defs.h index 99ebd754942..0077aeb4e22 100644 --- a/target/arm/tcg/helper-defs.h +++ b/target/arm/tcg/helper-defs.h @@ -56,6 +56,7 @@ DEF_HELPER_1(setend, void, env) DEF_HELPER_2(wfi, void, env, i32) DEF_HELPER_2(wfe, void, env, i32) DEF_HELPER_2(wfit, void, env, i32) +DEF_HELPER_2(wfet, void, env, i32) DEF_HELPER_1(yield, void, env) DEF_HELPER_1(pre_hvc, void, env) DEF_HELPER_2(pre_smc, void, env, i32) diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c index 3321e29898d..c4433be2ed0 100644 --- a/target/arm/tcg/op_helper.c +++ b/target/arm/tcg/op_helper.c @@ -641,6 +641,100 @@ void HELPER(wfe)(CPUARMState *env, uint32_t insn_len) #endif } +void HELPER(wfet)(CPUARMState *env, uint32_t rd) +{ +#ifdef CONFIG_USER_ONLY + /* + * As for WFIT make it NOP here, because trying to raise EXCP_HLT + * would trigger an abort. + */ + return; +#else + CPUState *cs = env_cpu(env); + uint32_t excp; + int target_el; + ARMCPU *cpu; + uint64_t cntval, timeout, offset, cntvct, nexttick; + int64_t next_event; + + /* + * As for WFE if the event register is already set we can consume + * the event and return immediately. + */ + if (qatomic_xchg(&env->event_register, false)) { + return; + } + + /* + * Don't bother to go into our "low power state" if + * we would just wake up immediately. + * + * We want the value that we would get if we read CNTVCT_EL0 from + * the current exception level, so the direct_access offset, not + * the indirect_access one. Compare the pseudocode LocalTimeoutEvent(), + * which calls VirtualCounterTimer(). + */ + cntval = gt_get_countervalue(env); + offset = gt_direct_access_timer_offset(env, GTIMER_VIRT); + cntvct = cntval - offset; + timeout = env->xregs[rd]; + if (cpu_has_work(cs) || cntvct >= timeout) { + return; + } + + /* We might sleep, so now we check to see if we should trap */ + target_el = check_wfx_trap(env, true, &excp); + if (target_el) { + env->pc -= 4; + raise_exception(env, excp, syn_wfx(1, 0xe, rd, true, WFET, false), target_el); + } + + /* + * If the CPU has entered the exclusive region we could sleep + * until the global monitor moves from Exclusive to Open Access. + * However it would be expensive for QEMU to fully model the + * global monitor and not doing so would potentially trigger + * deadlocks in WFE enabled locking code. However as WFE is a hint + * instruction the architecture allows for the PE to leave + * low-power state for any reason. QEMU chooses to treat being in + * an exclusive region as such and return directly. + */ + if (env->exclusive_addr != -1) { + return; + } + + /* + * Finally work out if the timeout or event stream will kick in + * earlier. + * + * The WFET should time out when CNTVCT_EL0 >= the specified value. + */ + cpu = env_archcpu(env); + if (uadd64_overflow(timeout, offset, &nexttick)) { + nexttick = UINT64_MAX; + } + if (nexttick > INT64_MAX / gt_cntfrq_period_ns(cpu)) { + nexttick = INT64_MAX; + } + + next_event = gt_calc_next_event_stream(env); + if (next_event > 0 && next_event < nexttick) { + timer_mod(cpu->wfxt_timer, next_event); + } else { + if (nexttick == INT64_MAX) { + timer_mod_ns(cpu->wfxt_timer, INT64_MAX); + } else { + timer_mod(cpu->wfxt_timer, nexttick); + } + } + + env->halt_reason = HALT_WFE; + cs->exception_index = EXCP_HLT; + cs->halted = 1; + cpu_loop_exit(cs); +#endif +} + void HELPER(yield)(CPUARMState *env) { CPUState *cs = env_cpu(env); diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index b45aac6d269..4a24e0a7fa0 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -2195,14 +2195,15 @@ static bool trans_WFET(DisasContext *s, arg_WFET *a) return false; } - /* - * We rely here on our WFE implementation being a NOP, so we - * don't need to do anything different to handle the WFET timeout - * from what trans_WFE does. - */ - if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) { - s->base.is_jmp = DISAS_WFE; + if (s->ss_active) { + /* Act like a NOP under architectural singlestep */ + return true; } + + gen_a64_update_pc(s, 4); + gen_helper_wfet(tcg_env, tcg_constant_i32(a->rd)); + /* Go back to the main loop to check for interrupts */ + s->base.is_jmp = DISAS_EXIT; return true; } -- 2.47.3
