This patch implements VCPU interrupts and requests which are both asynchronous events.
The VCPU interrupts can be set/unset using KVM_INTERRUPT ioctl from user-space. In future, the in-kernel IRQCHIP emulation will use kvm_riscv_vcpu_set_interrupt() and kvm_riscv_vcpu_unset_interrupt() functions to set/unset VCPU interrupts. Important VCPU requests implemented by this patch are: KVM_REQ_SLEEP - set whenever VCPU itself goes to sleep state KVM_REQ_VCPU_RESET - set whenever VCPU reset is requested The WFI trap-n-emulate (added later) will use KVM_REQ_SLEEP request and kvm_riscv_vcpu_has_interrupt() function. The KVM_REQ_VCPU_RESET request will be used by SBI emulation (added later) to power-up a VCPU in power-off state. The user-space can use the GET_MPSTATE/SET_MPSTATE ioctls to get/set power state of a VCPU. Signed-off-by: Anup Patel <anup.pa...@wdc.com> Acked-by: Paolo Bonzini <pbonz...@redhat.com> Reviewed-by: Paolo Bonzini <pbonz...@redhat.com> Reviewed-by: Alexander Graf <g...@amazon.com> --- arch/riscv/include/asm/kvm_host.h | 26 ++++ arch/riscv/include/uapi/asm/kvm.h | 3 + arch/riscv/kvm/main.c | 8 ++ arch/riscv/kvm/vcpu.c | 193 ++++++++++++++++++++++++++++-- 4 files changed, 217 insertions(+), 13 deletions(-) diff --git a/arch/riscv/include/asm/kvm_host.h b/arch/riscv/include/asm/kvm_host.h index dab32c9c3470..d801216da6d0 100644 --- a/arch/riscv/include/asm/kvm_host.h +++ b/arch/riscv/include/asm/kvm_host.h @@ -122,6 +122,21 @@ struct kvm_vcpu_arch { /* CPU CSR context upon Guest VCPU reset */ struct kvm_vcpu_csr guest_reset_csr; + /* + * VCPU interrupts + * + * We have a lockless approach for tracking pending VCPU interrupts + * implemented using atomic bitops. The irqs_pending bitmap represent + * pending interrupts whereas irqs_pending_mask represent bits changed + * in irqs_pending. Our approach is modeled around multiple producer + * and single consumer problem where the consumer is the VCPU itself. + */ + unsigned long irqs_pending; + unsigned long irqs_pending_mask; + + /* VCPU power-off state */ + bool power_off; + /* Don't run the VCPU (blocked) */ bool pause; @@ -135,6 +150,9 @@ static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_vcpu_block_finish(struct kvm_vcpu *vcpu) {} +int kvm_riscv_setup_vsip(void); +void kvm_riscv_cleanup_vsip(void); + void kvm_riscv_stage2_flush_cache(struct kvm_vcpu *vcpu); int kvm_riscv_stage2_alloc_pgd(struct kvm *kvm); void kvm_riscv_stage2_free_pgd(struct kvm *kvm); @@ -146,4 +164,12 @@ int kvm_riscv_vcpu_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, static inline void __kvm_riscv_switch_to(struct kvm_vcpu_arch *vcpu_arch) {} +int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq); +void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu); +bool kvm_riscv_vcpu_has_interrupt(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu); +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu); + #endif /* __RISCV_KVM_HOST_H__ */ diff --git a/arch/riscv/include/uapi/asm/kvm.h b/arch/riscv/include/uapi/asm/kvm.h index d15875818b6e..6dbc056d58ba 100644 --- a/arch/riscv/include/uapi/asm/kvm.h +++ b/arch/riscv/include/uapi/asm/kvm.h @@ -18,6 +18,9 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 +#define KVM_INTERRUPT_SET -1U +#define KVM_INTERRUPT_UNSET -2U + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { }; diff --git a/arch/riscv/kvm/main.c b/arch/riscv/kvm/main.c index e1ffe6d42f39..d088247843c5 100644 --- a/arch/riscv/kvm/main.c +++ b/arch/riscv/kvm/main.c @@ -48,6 +48,8 @@ int kvm_arch_hardware_enable(void) hideleg |= SIE_SEIE; csr_write(CSR_HIDELEG, hideleg); + csr_write(CSR_VSIP, 0); + return 0; } @@ -59,11 +61,17 @@ void kvm_arch_hardware_disable(void) int kvm_arch_init(void *opaque) { + int ret; + if (!riscv_isa_extension_available(NULL, h)) { kvm_info("hypervisor extension not available\n"); return -ENODEV; } + ret = kvm_riscv_setup_vsip(); + if (ret) + return ret; + kvm_info("hypervisor extension available\n"); return 0; diff --git a/arch/riscv/kvm/vcpu.c b/arch/riscv/kvm/vcpu.c index 8272b05d6ce4..3223f723f79e 100644 --- a/arch/riscv/kvm/vcpu.c +++ b/arch/riscv/kvm/vcpu.c @@ -11,6 +11,7 @@ #include <linux/err.h> #include <linux/kdebug.h> #include <linux/module.h> +#include <linux/percpu.h> #include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/sched/signal.h> @@ -40,6 +41,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { riscv_isa_extension_mask(s) | \ riscv_isa_extension_mask(u)) +static unsigned long __percpu *vsip_shadow; + static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; @@ -50,6 +53,9 @@ static void kvm_riscv_reset_vcpu(struct kvm_vcpu *vcpu) memcpy(csr, reset_csr, sizeof(*csr)); memcpy(cntx, reset_cntx, sizeof(*cntx)); + + WRITE_ONCE(vcpu->arch.irqs_pending, 0); + WRITE_ONCE(vcpu->arch.irqs_pending_mask, 0); } struct kvm_vcpu *kvm_arch_vcpu_create(struct kvm *kvm, unsigned int id) @@ -116,8 +122,8 @@ void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu) int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) { - /* TODO: */ - return 0; + return READ_ONCE(vcpu->arch.irqs_pending) & + vcpu->arch.guest_csr.vsie & (1UL << IRQ_S_TIMER); } void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) @@ -130,20 +136,18 @@ void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - /* TODO: */ - return 0; + return (kvm_riscv_vcpu_has_interrupt(vcpu) && + !vcpu->arch.power_off && !vcpu->arch.pause); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) { - /* TODO: */ - return 0; + return kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE; } bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu) { - /* TODO: */ - return false; + return (vcpu->arch.guest_context.sstatus & SR_SPP) ? true : false; } bool kvm_arch_has_vcpu_debugfs(void) @@ -164,7 +168,21 @@ vm_fault_t kvm_arch_vcpu_fault(struct kvm_vcpu *vcpu, struct vm_fault *vmf) long kvm_arch_vcpu_async_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { - /* TODO; */ + struct kvm_vcpu *vcpu = filp->private_data; + void __user *argp = (void __user *)arg; + + if (ioctl == KVM_INTERRUPT) { + struct kvm_interrupt irq; + + if (copy_from_user(&irq, argp, sizeof(irq))) + return -EFAULT; + + if (irq.irq == KVM_INTERRUPT_SET) + return kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_EXT); + else + return kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_S_EXT); + } + return -ENOIOCTLCMD; } @@ -213,18 +231,111 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs) return -EINVAL; } +void kvm_riscv_vcpu_flush_interrupts(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long mask, val; + + if (READ_ONCE(vcpu->arch.irqs_pending_mask)) { + mask = xchg_acquire(&vcpu->arch.irqs_pending_mask, 0); + val = READ_ONCE(vcpu->arch.irqs_pending) & mask; + + csr->vsip &= ~mask; + csr->vsip |= val; + } +} + +void kvm_riscv_vcpu_sync_interrupts(struct kvm_vcpu *vcpu) +{ + vcpu->arch.guest_csr.vsip = csr_read(CSR_VSIP); + vcpu->arch.guest_csr.vsie = csr_read(CSR_VSIE); + + /* Guest can directly update VSIP software interrupt bits */ + if (vcpu->arch.guest_csr.vsip ^ READ_ONCE(vcpu->arch.irqs_pending)) { + if (vcpu->arch.guest_csr.vsip & (1UL << IRQ_S_SOFT)) + kvm_riscv_vcpu_set_interrupt(vcpu, IRQ_S_SOFT); + else + kvm_riscv_vcpu_unset_interrupt(vcpu, IRQ_S_SOFT); + } +} + +int kvm_riscv_vcpu_set_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) +{ + if (irq != IRQ_S_SOFT && + irq != IRQ_S_TIMER && + irq != IRQ_S_EXT) + return -EINVAL; + + set_bit(irq, &vcpu->arch.irqs_pending); + smp_mb__before_atomic(); + set_bit(irq, &vcpu->arch.irqs_pending_mask); + + kvm_vcpu_kick(vcpu); + + return 0; +} + +int kvm_riscv_vcpu_unset_interrupt(struct kvm_vcpu *vcpu, unsigned int irq) +{ + if (irq != IRQ_S_SOFT && + irq != IRQ_S_TIMER && + irq != IRQ_S_EXT) + return -EINVAL; + + clear_bit(irq, &vcpu->arch.irqs_pending); + smp_mb__before_atomic(); + set_bit(irq, &vcpu->arch.irqs_pending_mask); + + return 0; +} + +bool kvm_riscv_vcpu_has_interrupt(struct kvm_vcpu *vcpu) +{ + return (READ_ONCE(vcpu->arch.irqs_pending) & + vcpu->arch.guest_csr.vsie) ? true : false; +} + +void kvm_riscv_vcpu_power_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); +} + +void kvm_riscv_vcpu_power_on(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = false; + kvm_vcpu_wake_up(vcpu); +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - /* TODO: */ + if (vcpu->arch.power_off) + mp_state->mp_state = KVM_MP_STATE_STOPPED; + else + mp_state->mp_state = KVM_MP_STATE_RUNNABLE; + return 0; } int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { - /* TODO: */ - return 0; + int ret = 0; + + switch (mp_state->mp_state) { + case KVM_MP_STATE_RUNNABLE: + vcpu->arch.power_off = false; + break; + case KVM_MP_STATE_STOPPED: + kvm_riscv_vcpu_power_off(vcpu); + break; + default: + ret = -EINVAL; + } + + return ret; } int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, @@ -248,7 +359,51 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static void kvm_riscv_check_vcpu_requests(struct kvm_vcpu *vcpu) { - /* TODO: */ + struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); + + if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) { + swait_event_interruptible_exclusive(*wq, + ((!vcpu->arch.power_off) && + (!vcpu->arch.pause))); + + if (vcpu->arch.power_off || vcpu->arch.pause) { + /* + * Awaken to handle a signal, request to + * sleep again later. + */ + kvm_make_request(KVM_REQ_SLEEP, vcpu); + } + } + + if (kvm_check_request(KVM_REQ_VCPU_RESET, vcpu)) + kvm_riscv_reset_vcpu(vcpu); + } +} + +static void kvm_riscv_update_vsip(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu_csr *csr = &vcpu->arch.guest_csr; + unsigned long *vsip = raw_cpu_ptr(vsip_shadow); + + if (*vsip != csr->vsip) { + csr_write(CSR_VSIP, csr->vsip); + *vsip = csr->vsip; + } +} + +int kvm_riscv_setup_vsip(void) +{ + vsip_shadow = alloc_percpu(unsigned long); + if (!vsip_shadow) + return -ENOMEM; + + return 0; +} + +void kvm_riscv_cleanup_vsip(void) +{ + free_percpu(vsip_shadow); } int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) @@ -311,6 +466,15 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) srcu_read_unlock(&vcpu->kvm->srcu, vcpu->arch.srcu_idx); smp_mb__after_srcu_read_unlock(); + /* + * We might have got VCPU interrupts updated asynchronously + * so update it in HW. + */ + kvm_riscv_vcpu_flush_interrupts(vcpu); + + /* Update VSIP CSR for current CPU */ + kvm_riscv_update_vsip(vcpu); + if (ret <= 0 || kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; @@ -334,6 +498,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) scause = csr_read(CSR_SCAUSE); stval = csr_read(CSR_STVAL); + /* Syncup interrupts state with HW */ + kvm_riscv_vcpu_sync_interrupts(vcpu); + /* * We may have taken a host interrupt in VS/VU-mode (i.e. * while executing the guest). This interrupt is still -- 2.17.1