From: David Woodhouse <[email protected]>

Add a per-VM capability to allow userspace to disable WFI and/or WFE
trapping, modelled after x86's KVM_CAP_X86_DISABLE_EXITS. When the
corresponding flag is set, the trap is unconditionally cleared
regardless of the global kvm-arm.wf{i,e}_trap_policy setting.

The existing kernel command line parameters provide a system-wide
override, but a per-VM capability allows the VMM to make the decision
per guest.

This is useful for hypervisors running a combination of dedicated
pinned vCPUs which want to avoid the cost of trapping WFI/WFE, as
well as overcommitted floating instances where it is necessary.

As with the x86 equivalent, KVM_CHECK_EXTENSION returns the bitmask of
supported exit disables.

Signed-off-by: David Woodhouse <[email protected]>
---
 Documentation/virt/kvm/api.rst    | 28 ++++++++++++++++++++++++++++
 arch/arm64/include/asm/kvm_host.h |  4 ++++
 arch/arm64/kvm/arm.c              | 20 ++++++++++++++++++++
 include/uapi/linux/kvm.h          |  6 ++++++
 4 files changed, 58 insertions(+)

diff --git a/Documentation/virt/kvm/api.rst b/Documentation/virt/kvm/api.rst
index 032516783e96..e3b3bd9edeec 100644
--- a/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@ -8902,6 +8902,34 @@ helpful if user space wants to emulate instructions 
which are not
 This capability can be enabled dynamically even if VCPUs were already
 created and are running.
 
+7.47 KVM_CAP_ARM_DISABLE_EXITS
+------------------------------
+
+:Architecture: arm64
+:Target: VM
+:Parameters: args[0] is a bitmask of exits to disable
+:Returns: 0 on success, -EINVAL if unsupported bits are set.
+
+Valid bits in args[0]:
+
+ - ``KVM_ARM_DISABLE_EXITS_WFI``: Disable trapping of WFI (Wait For
+   Interrupt) instructions. The guest WFI will execute natively instead
+   of causing a VM exit.
+
+ - ``KVM_ARM_DISABLE_EXITS_WFE``: Disable trapping of WFE (Wait For
+   Event) instructions. The guest WFE will execute natively instead of
+   causing a VM exit.
+
+When a bit is set, the corresponding trap is unconditionally cleared for
+all vCPUs in the VM, overriding the system-wide ``kvm-arm.wfi_trap_policy``
+and ``kvm-arm.wfe_trap_policy`` kernel parameters.
+
+Disabling exits is a one-way operation: once an exit type is disabled for
+a VM, it cannot be re-enabled. Calling this ioctl with args[0] = 0 is a
+no-op.
+
+``KVM_CHECK_EXTENSION`` returns the bitmask of exits that can be disabled.
+
 8. Other capabilities.
 ======================
 
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 70cb9cfd760a..a1bb025c641f 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -312,6 +312,10 @@ struct kvm_arch {
        size_t nested_mmus_size;
        int nested_mmus_next;
 
+       /* Per-VM WFI trap override; set via KVM_CAP_ARM_DISABLE_EXITS */
+       bool wfi_in_guest;
+       bool wfe_in_guest;
+
        /* Interrupt controller */
        struct vgic_dist        vgic;
 
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 410ffd41fd73..326a99fea753 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -178,6 +178,17 @@ int kvm_vm_ioctl_enable_cap(struct kvm *kvm,
                }
                mutex_unlock(&kvm->lock);
                break;
+       case KVM_CAP_ARM_DISABLE_EXITS:
+               if (cap->args[0] & ~KVM_ARM_DISABLE_VALID_EXITS) {
+                       r = -EINVAL;
+                       break;
+               }
+               if (cap->args[0] & KVM_ARM_DISABLE_EXITS_WFI)
+                       kvm->arch.wfi_in_guest = true;
+               if (cap->args[0] & KVM_ARM_DISABLE_EXITS_WFE)
+                       kvm->arch.wfe_in_guest = true;
+               r = 0;
+               break;
        case KVM_CAP_ARM_SEA_TO_USER:
                r = 0;
                set_bit(KVM_ARCH_FLAG_EXIT_SEA, &kvm->arch.flags);
@@ -379,6 +390,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_ARM_SEA_TO_USER:
                r = 1;
                break;
+       case KVM_CAP_ARM_DISABLE_EXITS:
+               r = KVM_ARM_DISABLE_VALID_EXITS;
+               break;
        case KVM_CAP_SET_GUEST_DEBUG2:
                return KVM_GUESTDBG_VALID_MASK;
        case KVM_CAP_ARM_SET_DEVICE_ADDR:
@@ -610,6 +624,9 @@ static void vcpu_set_pauth_traps(struct kvm_vcpu *vcpu)
 
 static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu)
 {
+       if (vcpu->kvm->arch.wfi_in_guest)
+               return true;
+
        if (unlikely(kvm_wfi_trap_policy != KVM_WFX_NOTRAP_SINGLE_TASK))
                return kvm_wfi_trap_policy == KVM_WFX_NOTRAP;
 
@@ -621,6 +638,9 @@ static bool kvm_vcpu_should_clear_twi(struct kvm_vcpu *vcpu)
 
 static bool kvm_vcpu_should_clear_twe(struct kvm_vcpu *vcpu)
 {
+       if (vcpu->kvm->arch.wfe_in_guest)
+               return true;
+
        if (unlikely(kvm_wfe_trap_policy != KVM_WFX_NOTRAP_SINGLE_TASK))
                return kvm_wfe_trap_policy == KVM_WFX_NOTRAP;
 
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 80364d4dbebb..694cf699ed0a 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -669,6 +669,11 @@ struct kvm_ioeventfd {
 #define KVM_X86_DISABLE_EXITS_CSTATE         (1 << 3)
 #define KVM_X86_DISABLE_EXITS_APERFMPERF     (1 << 4)
 
+#define KVM_ARM_DISABLE_EXITS_WFI            (1 << 0)
+#define KVM_ARM_DISABLE_EXITS_WFE            (1 << 1)
+#define KVM_ARM_DISABLE_VALID_EXITS          (KVM_ARM_DISABLE_EXITS_WFI | \
+                                             KVM_ARM_DISABLE_EXITS_WFE)
+
 /* for KVM_ENABLE_CAP */
 struct kvm_enable_cap {
        /* in */
@@ -989,6 +994,7 @@ struct kvm_enable_cap {
 #define KVM_CAP_ARM_SEA_TO_USER 245
 #define KVM_CAP_S390_USER_OPEREXEC 246
 #define KVM_CAP_S390_KEYOP 247
+#define KVM_CAP_ARM_DISABLE_EXITS 248
 
 struct kvm_irq_routing_irqchip {
        __u32 irqchip;
-- 
2.51.0


Reply via email to