> On 27. Apr 2026, at 21:55, Scott J. Goldman <[email protected]> wrote:
> 
> Commit b5f8f77271 ("accel/hvf: Implement WFI without using pselect()")
> changed hvf_wfi() from blocking the vCPU thread with pselect() to
> returning EXCP_HLT, intending QEMU's main event loop to handle the
> idle wait. However, cpu->halted was never set, so cpu_thread_is_idle()
> always returns false and the vCPU thread spins at 100% CPU per core
> while the guest is idle.
> 
> Fix this by:
> 
> 1. Setting cpu->halted = 1 in hvf_wfi() so the vCPU thread sleeps on
>  halt_cond in qemu_process_cpu_events().
> 
> 2. Arming a per-vCPU QEMU_CLOCK_VIRTUAL timer to fire when the guest's
>  virtual timer (CNTV_CVAL_EL0) would expire. This is necessary
>  because HVF only delivers HV_EXIT_REASON_VTIMER_ACTIVATED during
>  hv_vcpu_run(), which is not called while the CPU is halted. The
>  timer callback mirrors the VTIMER_ACTIVATED handler: it raises the
>  vtimer IRQ through the GIC and marks vtimer_masked, causing the
>  interrupt delivery chain to wake the vCPU via qemu_cpu_kick().
> 
> 3. Clearing cpu->halted in hvf_arch_vcpu_exec() when cpu_has_work()
>  indicates a pending interrupt, and cancelling the WFI timer.
> 
> 4. Re-arming the WFI timer from hvf_vm_state_change() on the resume
>  transition for any halted vCPU, since the QEMUTimer is per-instance
>  state and is not migrated. After cpu_synchronize_all_states() the
>  migrated vtimer state is mirrored in env, so we can read CNTV_CTL
>  and CNTV_CVAL from there. If the vtimer has already expired by the
>  time the destination resumes, hvf_wfi_timer_cb() is invoked
>  directly so the halted vCPU is woken up.
> 
> Fixes: b5f8f77271 ("accel/hvf: Implement WFI without using pselect()")
> Signed-off-by: Scott J. Goldman <[email protected]>
> ---
> Changes since v2:
> - Use QEMU_CLOCK_VIRTUAL instead of QEMU_CLOCK_HOST so the timer
>  pauses with the VM and a halted vCPU isn't woken (or its IRQ
>  raised) while the user has stopped the guest. (Peter)
> - Convert vtimer ticks to nanoseconds with muldiv64() to avoid
>  intermediate overflow. (Peter)
> - Re-arm the WFI timer from hvf_vm_state_change() on the resume
>  transition so a halted vCPU on the migration destination is
>  woken when its vtimer expires (the QEMUTimer is per-instance
>  state and isn't migrated). (Peter)
> v2: 
> https://lore.kernel.org/qemu-devel/[email protected]/
> v1: 
> https://lore.kernel.org/qemu-devel/[email protected]/

For QEMU 11.0 (for backporting to stable):

Reviewed-by: Mohamed Mediouni <[email protected]>

For QEMU 11.1:

Adding some checks for !hvf_irqchip_in_kernel() needed
but can do them on my side if you prefer.

Looks ready apart from that bit.

> 
> include/system/hvf_int.h |   1 +
> target/arm/hvf/hvf.c     | 124 ++++++++++++++++++++++++++++++++++++++-
> 2 files changed, 124 insertions(+), 1 deletion(-)
> 
> diff --git a/include/system/hvf_int.h b/include/system/hvf_int.h
> index 2621164cb2..58fb865eba 100644
> --- a/include/system/hvf_int.h
> +++ b/include/system/hvf_int.h
> @@ -48,6 +48,7 @@ struct AccelCPUState {
>    hv_vcpu_exit_t *exit;
>    bool vtimer_masked;
>    bool guest_debug_enabled;
> +    struct QEMUTimer *wfi_timer;
> #endif
> };
> 
> diff --git a/target/arm/hvf/hvf.c b/target/arm/hvf/hvf.c
> index 678afe5c8e..a19d7a5e1f 100644
> --- a/target/arm/hvf/hvf.c
> +++ b/target/arm/hvf/hvf.c
> @@ -28,6 +28,7 @@
> #include "hw/core/boards.h"
> #include "hw/core/irq.h"
> #include "qemu/main-loop.h"
> +#include "qemu/timer.h"
> #include "system/cpus.h"
> #include "arm-powerctl.h"
> #include "target/arm/cpu.h"
> @@ -301,6 +302,8 @@ void hvf_arm_init_debug(void)
> #define TMR_CTL_IMASK   (1 << 1)
> #define TMR_CTL_ISTATUS (1 << 2)
> 
> +static void hvf_wfi_timer_cb(void *opaque);
> +
> static uint32_t chosen_ipa_bit_size;
> 
> typedef struct HVFVTimer {
> @@ -1214,6 +1217,9 @@ void hvf_arch_vcpu_destroy(CPUState *cpu)
> {
>    hv_return_t ret;
> 
> +    timer_free(cpu->accel->wfi_timer);
> +    cpu->accel->wfi_timer = NULL;
> +
>    ret = hv_vcpu_destroy(cpu->accel->fd);
>    assert_hvf_ok(ret);
> }
> @@ -1352,6 +1358,9 @@ int hvf_arch_init_vcpu(CPUState *cpu)
>                              arm_cpu->isar.idregs[ID_AA64MMFR0_EL1_IDX]);
>    assert_hvf_ok(ret);
> 
> +    cpu->accel->wfi_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL,
> +                                          hvf_wfi_timer_cb, cpu);
> +
adding !hvf_irqchip_in_kernel()

>     aarch64_add_sme_properties(OBJECT(cpu));
>    return 0;
> }
> @@ -2027,8 +2036,67 @@ static uint64_t hvf_vtimer_val_raw(void)
>    return mach_absolute_time() - hvf_state->vtimer_offset;
> }
> 
> +static void hvf_wfi_timer_cb(void *opaque)
> +{
> +    CPUState *cpu = opaque;
> +    ARMCPU *arm_cpu = ARM_CPU(cpu);
> +
> +    /*
> +     * vtimer expired while the CPU was halted for WFI.
> +     * Mirror HV_EXIT_REASON_VTIMER_ACTIVATED: raise the vtimer
> +     * interrupt and mark as masked so hvf_sync_vtimer() will
> +     * check and unmask when the guest handles it.
> +     *
> +     * The interrupt delivery chain (GIC -> cpu_interrupt ->
> +     * qemu_cpu_kick) wakes the vCPU thread from halt_cond.
> +     */
> +    qemu_set_irq(arm_cpu->gt_timer_outputs[GTIMER_VIRT], 1);
> +    cpu->accel->vtimer_masked = true;
> +}
> +
> +/*
> + * Arm a host-side QEMU_CLOCK_VIRTUAL timer to fire when the guest's
> + * vtimer (CNTV_CVAL_EL0) is scheduled to expire. HVF only delivers
> + * HV_EXIT_REASON_VTIMER_ACTIVATED during hv_vcpu_run(), which we won't
> + * call while the vCPU is halted, so we need this to wake the vCPU.
> + *
> + * QEMU_CLOCK_VIRTUAL pauses while the VM is stopped, which keeps the
> + * timer in lockstep with the guest's view of vtime across pause/resume.
> + *
> + * Caller must supply the current CNTV_CTL_EL0 and CNTV_CVAL_EL0 values,
> + * since the appropriate source (HVF vs. env) depends on context.
> + *
> + * Returns 0 if the timer was armed (or if the vtimer is disabled/masked
> + * and the vCPU should still halt waiting on another event), or -1 if
> + * the vtimer has already expired.
> + */
> +static int hvf_arm_wfi_timer(CPUState *cpu, uint64_t ctl, uint64_t cval)
> +{
> +    ARMCPU *arm_cpu = ARM_CPU(cpu);
> +    uint64_t now;
> +    int64_t delta_ns;
> +
> +    if (!(ctl & TMR_CTL_ENABLE) || (ctl & TMR_CTL_IMASK)) {
> +        return 0;
> +    }
> +
> +    now = hvf_vtimer_val_raw();
> +    if (cval <= now) {
> +        return -1;
> +    }
> +
> +    delta_ns = muldiv64(cval - now, NANOSECONDS_PER_SECOND,
> +                        arm_cpu->gt_cntfrq_hz);
> +    timer_mod(cpu->accel->wfi_timer,
> +              qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) + delta_ns);
> +    return 0;
> +}
> +
> static int hvf_wfi(CPUState *cpu)
> {
> +    uint64_t ctl, cval;
> +    hv_return_t r;
> +
>    if (cpu_has_work(cpu)) {
>        /*
>         * Don't bother to go into our "low power state" if
> @@ -2037,6 +2105,22 @@ static int hvf_wfi(CPUState *cpu)
>        return 0;
>    }
> 
> +    /*
> +     * Read the vtimer state directly from HVF. We're on the vCPU thread,
> +     * just exited from hv_vcpu_run(), so HVF holds the authoritative
> +     * values and env may be stale.
> +     */
> +    r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CTL_EL0, &ctl);
> +    assert_hvf_ok(r);
> +    r = hv_vcpu_get_sys_reg(cpu->accel->fd, HV_SYS_REG_CNTV_CVAL_EL0, &cval);
> +    assert_hvf_ok(r);
> +
> +    if (hvf_arm_wfi_timer(cpu, ctl, cval) < 0) {
> +        /* vtimer already expired, don't halt */
> +        return 0;
> +    }
> +
> +    cpu->halted = 1;
>    return EXCP_HLT;
> }
> 
> @@ -2332,7 +2416,11 @@ int hvf_arch_vcpu_exec(CPUState *cpu)
>    hv_return_t r;
> 
>    if (cpu->halted) {
> -        return EXCP_HLT;
> +        if (!cpu_has_work(cpu)) {
> +            return EXCP_HLT;
> +        }
> +        cpu->halted = 0;
> +        timer_del(cpu->accel->wfi_timer);
!hvf_irqchip_in_kernel(), we shouldn’t have the wfi timer otherwise
>     }
> 
>    flush_cpu_state(cpu);
> @@ -2376,11 +2464,45 @@ static const VMStateDescription vmstate_hvf_vtimer = {
> static void hvf_vm_state_change(void *opaque, bool running, RunState state)
> {
>    HVFVTimer *s = opaque;
> +    CPUState *cpu;
> 
>    if (running) {
>        /* Update vtimer offset on all CPUs */
>        hvf_state->vtimer_offset = mach_absolute_time() - s->vtimer_val;
>        cpu_synchronize_all_states();
> +
> +        /*
> +         * After migration restore (or any resume), the wfi_timer is not
> +         * scheduled on this QEMU instance, so re-arm it for any halted
> +         * vCPU with a pending vtimer. For a non-migration resume the
> +         * QEMU_CLOCK_VIRTUAL timer was already scheduled; recomputing the
> +         * deadline produces the same value and is a harmless no-op.
> +         *
> +         * cpu_synchronize_all_states() above ensures env mirrors the
> +         * authoritative vtimer state (whether that came from HVF or from
> +         * the migration stream), so we can safely read it here from the
> +         * iothread.
> +         */

This should be gated behind !hvf_irqchip_in_kernel()

> +        CPU_FOREACH(cpu) {
> +            ARMCPU *arm_cpu;
> +            uint64_t ctl, cval;
> +
> +            if (!cpu->accel || !cpu->halted) {
> +                continue;
> +            }
> +
> +            arm_cpu = ARM_CPU(cpu);
> +            ctl = arm_cpu->env.cp15.c14_timer[GTIMER_VIRT].ctl;
> +            cval = arm_cpu->env.cp15.c14_timer[GTIMER_VIRT].cval;
> +
> +            if (hvf_arm_wfi_timer(cpu, ctl, cval) < 0) {
> +                /*
> +                 * vtimer already expired while we were paused; raise the
> +                 * IRQ now so the halted vCPU wakes up.
> +                 */
> +                hvf_wfi_timer_cb(cpu);
> +            }
> +        }
>    } else {
>        /* Remember vtimer value on every pause */
>        s->vtimer_val = hvf_vtimer_val_raw();
> -- 
> 2.50.1 (Apple Git-155)
> 
> 


Reply via email to