Ashish Anand <[email protected]> writes:
> Currently, QEMU implements the 'Wait For Event' (WFE) instruction as a
> simple yield. This causes high host CPU usage because guest
> RTOS idle loops effectively become busy-wait loops.
>
> To improve efficiency, this patch transitions WFE to use the architectural
> 'Halt' state (EXCP_HLT) for M-profile CPUs. This allows the host thread
> to sleep when the guest is idle.
>
> To support this transition, we implement the full architectural behavior
> required for WFE, specifically the 'Event Register', 'SEVONPEND' logic,
> and 'R_BPBR' exception handling requirements defined in the ARM
> Architecture Reference Manual.
>
> This patch enables resource-efficient idle emulation for Cortex-M.
>
> Signed-off-by: Ashish Anand <[email protected]>
> ---
> Changes in v2:
> 1. Added R_BPBR handling as required by the Armv8-M architecture.
> 2. Unified pending-state updates to detect interrupt transitions for
> SEVONPEND.
> 3. Moved the Event Register to the top-level ARM CPU state, since it is
> architecturally shared between A-profile and M-profile CPUs.
> 4. Updated migration handling to be profile-agnostic so the state can be
> reused if A-profile WFE support is implemented in the future.
> 5. Added CONFIG_USER_ONLY handling for WFE/SEV and tightened profile checks.
> 6. Addressed review comments and fixes raised during v1 discussion.
>
> Link to v1 -
> https://lore.kernel.org/qemu-devel/[email protected]/
>
>
<snip>
>
> + /*
> + * The event register is shared by all ARM profiles (A/R/M),
> + * so it is stored in the top-level CPU state.
> + * WFE/SEV handling is currently implemented only for M-profile.
> + */
> + uint32_t event_register;
> +
Looking at the Arm ARM for armv8m it says:
The Event register is a single-bit register for each PE in the system.
Software cannot read, and cannot write to, the Event register directly.
So I wonder if this could simply be a bool - and in that case some of
the logic simplifies.
> /* Fields up to this point are cleared by a CPU reset */
> struct {} end_reset_fields;
>
> diff --git a/target/arm/machine.c b/target/arm/machine.c
> index 0befdb0b28..814a74745f 100644
> --- a/target/arm/machine.c
> +++ b/target/arm/machine.c
> @@ -508,6 +508,24 @@ static const VMStateDescription vmstate_m_mve = {
> },
> };
>
> +static bool event_needed(void *opaque)
> +{
> + ARMCPU *cpu = opaque;
> + /* Only save the state if the event register is set (non-zero) */
> + return cpu->env.event_register != 0;
> +}
> +
> +static const VMStateDescription vmstate_event = {
> + .name = "cpu/event",
> + .version_id = 1,
> + .minimum_version_id = 1,
> + .needed = event_needed,
> + .fields = (const VMStateField[]) {
> + VMSTATE_UINT32(env.event_register, ARMCPU),
> + VMSTATE_END_OF_LIST()
> + }
> +};
> +
> static const VMStateDescription vmstate_m = {
> .name = "cpu/m",
> .version_id = 4,
> @@ -1210,6 +1228,7 @@ const VMStateDescription vmstate_arm_cpu = {
> &vmstate_wfxt_timer,
> &vmstate_syndrome64,
> &vmstate_pstate64,
> + &vmstate_event,
> NULL
> }
> };
> diff --git a/target/arm/tcg/helper.h b/target/arm/tcg/helper.h
> index 4636d1bc03..5a10a9fba3 100644
> --- a/target/arm/tcg/helper.h
> +++ b/target/arm/tcg/helper.h
> @@ -60,6 +60,7 @@ DEF_HELPER_1(yield, void, env)
> DEF_HELPER_1(pre_hvc, void, env)
> DEF_HELPER_2(pre_smc, void, env, i32)
> DEF_HELPER_1(vesb, void, env)
> +DEF_HELPER_1(sev, void, env)
>
> DEF_HELPER_3(cpsr_write, void, env, i32, i32)
> DEF_HELPER_2(cpsr_write_eret, void, env, i32)
> diff --git a/target/arm/tcg/m_helper.c b/target/arm/tcg/m_helper.c
> index 3fb24c7790..2a2224c996 100644
> --- a/target/arm/tcg/m_helper.c
> +++ b/target/arm/tcg/m_helper.c
> @@ -962,7 +962,9 @@ static void v7m_exception_taken(ARMCPU *cpu, uint32_t lr,
> bool dotailchain,
> * Now we've done everything that might cause a derived exception
> * we can go ahead and activate whichever exception we're going to
> * take (which might now be the derived exception).
> + * Exception entry sets the event register (ARM ARM R_BPBR)
> */
> + env->event_register = 1;
> armv7m_nvic_acknowledge_irq(env->nvic);
>
> /* Switch to target security state -- must do this before writing SPSEL
> */
> @@ -1906,6 +1908,9 @@ static void do_v7m_exception_exit(ARMCPU *cpu)
> /* Otherwise, we have a successful exception exit. */
> arm_clear_exclusive(env);
> arm_rebuild_hflags(env);
> +
> + /* Exception return sets the event register (ARM ARM R_BPBR) */
> + env->event_register = 1;
> qemu_log_mask(CPU_LOG_INT, "...successful exception return\n");
> }
>
> diff --git a/target/arm/tcg/op_helper.c b/target/arm/tcg/op_helper.c
> index 4fbd219555..9f385dae6d 100644
> --- a/target/arm/tcg/op_helper.c
> +++ b/target/arm/tcg/op_helper.c
> @@ -469,16 +469,58 @@ void HELPER(wfit)(CPUARMState *env, uint64_t timeout)
> #endif
> }
>
> +void HELPER(sev)(CPUARMState *env)
> +{
> + CPUState *cs = env_cpu(env);
> + CPU_FOREACH(cs) {
> + ARMCPU *target_cpu = ARM_CPU(cs);
> + if (arm_feature(&target_cpu->env, ARM_FEATURE_M)) {
> + target_cpu->env.event_register = 1;
> + }
> + if (!qemu_cpu_is_self(cs)) {
> + qemu_cpu_kick(cs);
> + }
> + }
> +}
> +
> void HELPER(wfe)(CPUARMState *env)
> {
> - /* This is a hint instruction that is semantically different
> - * from YIELD even though we currently implement it identically.
> - * Don't actually halt the CPU, just yield back to top
> - * level loop. This is not going into a "low power state"
> - * (ie halting until some event occurs), so we never take
> - * a configurable trap to a different exception level.
> +#ifdef CONFIG_USER_ONLY
> + /*
> + * WFE in the user-mode emulator is a NOP. Real-world user-mode code
> + * shouldn't execute WFE, but if it does, we make it a NOP rather than
> + * aborting when we try to raise EXCP_HLT.
> + */
> + return;
> +#else
> + /*
> + * WFE (Wait For Event) is a hint instruction.
> + * For Cortex-M (M-profile), we implement the strict architectural
> behavior:
> + * 1. Check the Event Register (set by SEV or SEVONPEND).
> + * 2. If set, clear it and continue (consume the event).
> */
> - HELPER(yield)(env);
> + if (arm_feature(env, ARM_FEATURE_M)) {
> + CPUState *cs = env_cpu(env);
> +
> + if (env->event_register) {
> + env->event_register = 0;
> + return;
> + }
> +
> + cs->exception_index = EXCP_HLT;
> + cs->halted = 1;
> + cpu_loop_exit(cs);
> + } else {
> + /*
> + * For A-profile and others, we rely on the existing "yield"
> behavior.
> + * Don't actually halt the CPU, just yield back to top
> + * level loop. This is not going into a "low power state"
> + * (ie halting until some event occurs), so we never take
> + * a configurable trap to a different exception level
> + */
> + HELPER(yield)(env);
> + }
> +#endif
> }
>
> void HELPER(yield)(CPUARMState *env)
> diff --git a/target/arm/tcg/t16.decode b/target/arm/tcg/t16.decode
> index 646c74929d..778fbf1627 100644
> --- a/target/arm/tcg/t16.decode
> +++ b/target/arm/tcg/t16.decode
> @@ -228,8 +228,9 @@ REVSH 1011 1010 11 ... ... @rdm
> WFE 1011 1111 0010 0000
> WFI 1011 1111 0011 0000
>
> - # TODO: Implement SEV, SEVL; may help SMP performance.
> - # SEV 1011 1111 0100 0000
> + # M-profile SEV is implemented.
> + # TODO: Implement SEV for other profiles, and SEVL for all profiles; may
> help SMP performance.
> + SEV 1011 1111 0100 0000
> # SEVL 1011 1111 0101 0000
>
> # The canonical nop has the second nibble as 0000, but the whole of the
> diff --git a/target/arm/tcg/t32.decode b/target/arm/tcg/t32.decode
> index d327178829..49b8d0037e 100644
> --- a/target/arm/tcg/t32.decode
> +++ b/target/arm/tcg/t32.decode
> @@ -369,8 +369,9 @@ CLZ 1111 1010 1011 ---- 1111 .... 1000 ....
> @rdm
> WFE 1111 0011 1010 1111 1000 0000 0000 0010
> WFI 1111 0011 1010 1111 1000 0000 0000 0011
>
> - # TODO: Implement SEV, SEVL; may help SMP performance.
> - # SEV 1111 0011 1010 1111 1000 0000 0000 0100
> + # M-profile SEV is implemented.
> + # TODO: Implement SEV for other profiles, and SEVL for all profiles;
> may help SMP performance.
> + SEV 1111 0011 1010 1111 1000 0000 0000 0100
> # SEVL 1111 0011 1010 1111 1000 0000 0000 0101
>
> ESB 1111 0011 1010 1111 1000 0000 0001 0000
> diff --git a/target/arm/tcg/translate.c b/target/arm/tcg/translate.c
> index 63735d9789..c90b0106f7 100644
> --- a/target/arm/tcg/translate.c
> +++ b/target/arm/tcg/translate.c
> @@ -3241,14 +3241,30 @@ static bool trans_YIELD(DisasContext *s, arg_YIELD *a)
> return true;
> }
>
> +static bool trans_SEV(DisasContext *s, arg_SEV *a)
> +{
> + /*
> + * Currently SEV is a NOP for non-M-profile and in user-mode emulation.
> + * For system-mode M-profile, it sets the event register.
> + */
> +#ifndef CONFIG_USER_ONLY
> + if (arm_dc_feature(s, ARM_FEATURE_M)) {
> + gen_helper_sev(tcg_env);
> + }
> +#endif
> + return true;
> +}
> +
> static bool trans_WFE(DisasContext *s, arg_WFE *a)
> {
> /*
> * When running single-threaded TCG code, use the helper to ensure that
> - * the next round-robin scheduled vCPU gets a crack. In MTTCG mode we
> - * just skip this instruction. Currently the SEV/SEVL instructions,
> - * which are *one* of many ways to wake the CPU from WFE, are not
> - * implemented so we can't sleep like WFI does.
> + * the next round-robin scheduled vCPU gets a crack.
> + *
> + * For Cortex-M, we implement the architectural WFE behavior (sleeping
> + * until an event occurs or the Event Register is set).
> + * For other profiles, we currently treat this as a NOP or yield,
> + * to preserve existing performance characteristics.
> */
> if (!(tb_cflags(s->base.tb) & CF_PARALLEL)) {
> gen_update_pc(s, curr_insn_len(s));
> @@ -6807,6 +6823,11 @@ static void arm_tr_tb_stop(DisasContextBase *dcbase,
> CPUState *cpu)
> break;
> case DISAS_WFE:
> gen_helper_wfe(tcg_env);
> + /*
> + * The helper can return if the event register is set, so we
> + * must go back to the main loop to check for events.
> + */
> + tcg_gen_exit_tb(NULL, 0);
> break;
> case DISAS_YIELD:
> gen_helper_yield(tcg_env);
--
Alex Bennée
Virtualisation Tech Lead @ Linaro