The rVIC (reduced Virtual Interrupt Controller), and its rVID
(reduced Virtual Interrupt Distributor) companion are the two
parts of a PV interrupt controller architecture, aiming at supporting
VMs with minimal interrupt requirements.

Signed-off-by: Marc Zyngier <m...@kernel.org>
---
 arch/arm64/include/asm/kvm_host.h |    7 +-
 arch/arm64/include/asm/kvm_irq.h  |    2 +
 arch/arm64/include/uapi/asm/kvm.h |    9 +
 arch/arm64/kvm/Makefile           |    2 +-
 arch/arm64/kvm/arm.c              |    3 +
 arch/arm64/kvm/hypercalls.c       |    7 +
 arch/arm64/kvm/rvic-cpu.c         | 1073 +++++++++++++++++++++++++++++
 include/kvm/arm_rvic.h            |   41 ++
 include/linux/irqchip/irq-rvic.h  |    4 +
 include/uapi/linux/kvm.h          |    2 +
 10 files changed, 1148 insertions(+), 2 deletions(-)
 create mode 100644 arch/arm64/kvm/rvic-cpu.c
 create mode 100644 include/kvm/arm_rvic.h

diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 5dd92873d40f..381d3ff6e0b7 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -35,6 +35,7 @@
 #include <kvm/arm_vgic.h>
 #include <kvm/arm_arch_timer.h>
 #include <kvm/arm_pmu.h>
+#include <kvm/arm_rvic.h>
 
 #define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS
 
@@ -102,6 +103,7 @@ struct kvm_arch {
        enum kvm_irqchip_type   irqchip_type;
        bool                    irqchip_finalized;
        struct kvm_irqchip_flow irqchip_flow;
+       void                    *irqchip_data;
        struct vgic_dist        vgic;
 
        /* Mandated version of PSCI */
@@ -324,7 +326,10 @@ struct kvm_vcpu_arch {
        } host_debug_state;
 
        /* VGIC state */
-       struct vgic_cpu vgic_cpu;
+       union {
+               struct vgic_cpu vgic_cpu;
+               struct rvic rvic;
+       };
        struct arch_timer_cpu timer_cpu;
        struct kvm_pmu pmu;
 
diff --git a/arch/arm64/include/asm/kvm_irq.h b/arch/arm64/include/asm/kvm_irq.h
index 05fbe5241642..bb1666093f80 100644
--- a/arch/arm64/include/asm/kvm_irq.h
+++ b/arch/arm64/include/asm/kvm_irq.h
@@ -11,11 +11,13 @@ enum kvm_irqchip_type {
        IRQCHIP_USER,           /* Implemented in userspace */
        IRQCHIP_GICv2,          /* v2 on v2, or v2 on v3 */
        IRQCHIP_GICv3,          /* v3 on v3 */
+       IRQCHIP_RVIC,           /* PV irqchip */
 };
 
 #define irqchip_in_kernel(k)   ((k)->arch.irqchip_type != IRQCHIP_USER)
 #define irqchip_is_gic_v2(k)   ((k)->arch.irqchip_type == IRQCHIP_GICv2)
 #define irqchip_is_gic_v3(k)   ((k)->arch.irqchip_type == IRQCHIP_GICv3)
+#define irqchip_is_rvic(k)     ((k)->arch.irqchip_type == IRQCHIP_RVIC)
 
 #define irqchip_finalized(k)   ((k)->arch.irqchip_finalized)
 
diff --git a/arch/arm64/include/uapi/asm/kvm.h 
b/arch/arm64/include/uapi/asm/kvm.h
index ba85bb23f060..9fc26c84903f 100644
--- a/arch/arm64/include/uapi/asm/kvm.h
+++ b/arch/arm64/include/uapi/asm/kvm.h
@@ -335,6 +335,15 @@ struct kvm_vcpu_events {
 #define KVM_ARM_VCPU_PVTIME_CTRL       2
 #define   KVM_ARM_VCPU_PVTIME_IPA      0
 
+/*
+ * Device Control API: ARM RVIC. We only use the group, not the group
+ * attributes. They must be set to 0 for now.
+ */
+#define KVM_DEV_ARM_RVIC_GRP_NR_IRQS   0
+#define   KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK 0xffff
+#define   KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK   (0xffff << 16)
+#define KVM_DEV_ARM_RVIC_GRP_INIT      1
+
 /* KVM_IRQ_LINE irq field index values */
 #define KVM_ARM_IRQ_VCPU2_SHIFT                28
 #define KVM_ARM_IRQ_VCPU2_MASK         0xf
diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
index 99977c1972cc..e378293ce99b 100644
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@@ -16,7 +16,7 @@ kvm-y := $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o 
$(KVM)/eventfd.o \
         inject_fault.o regmap.o va_layout.o hyp.o handle_exit.o \
         guest.o debug.o reset.o sys_regs.o \
         vgic-sys-reg-v3.o fpsimd.o pmu.o \
-        aarch32.o arch_timer.o \
+        aarch32.o arch_timer.o rvic-cpu.o \
         vgic/vgic.o vgic/vgic-init.o \
         vgic/vgic-irqfd.o vgic/vgic-v2.o \
         vgic/vgic-v3.o vgic/vgic-v4.o \
diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c
index 0d4c8de27d1e..bf0b11bdce84 100644
--- a/arch/arm64/kvm/arm.c
+++ b/arch/arm64/kvm/arm.c
@@ -41,6 +41,7 @@
 #include <kvm/arm_hypercalls.h>
 #include <kvm/arm_pmu.h>
 #include <kvm/arm_psci.h>
+#include <kvm/arm_rvic.h>
 
 #ifdef REQUIRES_VIRT
 __asm__(".arch_extension       virt");
@@ -1402,6 +1403,8 @@ static int init_subsystems(void)
        switch (err) {
        case 0:
                vgic_present = true;
+               if (kvm_register_rvic_device())
+                       kvm_err("Failed to register rvic device type\n");
                break;
        case -ENODEV:
        case -ENXIO:
diff --git a/arch/arm64/kvm/hypercalls.c b/arch/arm64/kvm/hypercalls.c
index 550dfa3e53cd..f6620be74ce5 100644
--- a/arch/arm64/kvm/hypercalls.c
+++ b/arch/arm64/kvm/hypercalls.c
@@ -8,6 +8,9 @@
 
 #include <kvm/arm_hypercalls.h>
 #include <kvm/arm_psci.h>
+#include <kvm/arm_rvic.h>
+
+#include <linux/irqchip/irq-rvic.h>
 
 int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
 {
@@ -62,6 +65,10 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
                if (gpa != GPA_INVALID)
                        val = gpa;
                break;
+       case SMC64_RVIC_BASE ... SMC64_RVIC_LAST:
+               return kvm_rvic_handle_hcall(vcpu);
+       case SMC64_RVID_BASE ... SMC64_RVID_LAST:
+               return kvm_rvid_handle_hcall(vcpu);
        default:
                return kvm_psci_call(vcpu);
        }
diff --git a/arch/arm64/kvm/rvic-cpu.c b/arch/arm64/kvm/rvic-cpu.c
new file mode 100644
index 000000000000..5fb200c637d9
--- /dev/null
+++ b/arch/arm64/kvm/rvic-cpu.c
@@ -0,0 +1,1073 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * rVIC/rVID PV interrupt controller implementation for KVM/arm64.
+ *
+ * Copyright 2020 Google LLC.
+ * Author: Marc Zyngier <m...@kernel.org>
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+#include <kvm/arm_hypercalls.h>
+#include <kvm/arm_rvic.h>
+
+#include <linux/irqchip/irq-rvic.h>
+
+/* FIXME: lock/unlock_all_vcpus */
+#include "vgic/vgic.h"
+
+#define kvm_vcpu_to_rvic(v)    (&(v)->arch.rvic)
+#define kvm_rvic_to_vcpu(r)    (container_of((r), struct kvm_vcpu, arch.rvic))
+
+#define rvic_nr_untrusted(r)   ((r)->nr_total - (r)->nr_trusted)
+
+struct rvic_vm_data {
+       u16             nr_trusted;
+       u16             nr_total;
+       spinlock_t      lock;
+       /* Map is a dynamically allocated array of (total-trusted) elements */
+       struct {
+               u16     target_vcpu;
+               u16     intid;
+       } rvid_map[];
+};
+
+/*
+ * rvic_irq state machine:
+ *
+ * idle <- S/C -> pending
+ *  ^          /    ^
+ *  |         /     |
+ * U/M       A     U/M
+ *  |       /       |
+ *  v     v         V
+ * masked <- S/C -> masked+pending
+ *
+ * [S]: Set Pending, [C]: Clear Pending
+ * [U]: Unmask, [M]: Mask
+ * [A]: Ack
+ */
+
+static struct rvic_irq *rvic_get_irq(struct rvic *rvic, unsigned int intid)
+{
+       if (intid >= rvic->nr_total)
+               return NULL;
+       return &rvic->irqs[intid];
+}
+
+static bool rvic_irq_queued(struct rvic_irq *irq)
+{
+       return !list_empty(&irq->delivery_entry);
+}
+
+/* RVIC primitives. They all imply that the RVIC lock is held */
+static void __rvic_enable(struct rvic *rvic)
+{
+       rvic->enabled = true;
+}
+
+static void __rvic_disable(struct rvic *rvic)
+{
+       rvic->enabled = false;
+}
+
+static bool __rvic_is_enabled(struct rvic *rvic)
+{
+       return rvic->enabled;
+}
+
+static void __rvic_set_pending(struct rvic *rvic, unsigned int intid)
+{
+       struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+       unsigned long flags;
+
+       if (!__rvic_is_enabled(rvic)) {
+               pr_debug("dropping intid %u\n", intid);
+               return;
+       }
+
+       spin_lock_irqsave(&irq->lock, flags);
+
+       irq->pending = true;
+       if (!irq->masked && !rvic_irq_queued(irq))
+               list_add_tail(&irq->delivery_entry, &rvic->delivery);
+
+       spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static void __rvic_clear_pending(struct rvic *rvic, unsigned int intid)
+{
+       struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+       unsigned long flags;
+
+       spin_lock_irqsave(&irq->lock, flags);
+
+       irq->pending = false;
+       list_del_init(&irq->delivery_entry);
+
+       spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static bool __rvic_is_pending(struct rvic *rvic, unsigned int intid)
+{
+       struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+       unsigned long flags;
+       bool pend;
+
+       spin_lock_irqsave(&irq->lock, flags);
+       pend = irq->pending;
+       spin_unlock_irqrestore(&irq->lock, flags);
+
+       return pend;
+}
+
+static void __rvic_set_masked(struct rvic *rvic, unsigned int intid)
+{
+       struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+       unsigned long flags;
+
+       spin_lock_irqsave(&irq->lock, flags);
+
+       irq->masked = true;
+       if (irq->pending)
+               list_del_init(&irq->delivery_entry);
+
+       spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static void __rvic_clear_masked(struct rvic *rvic, unsigned int intid)
+{
+       struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+       unsigned long flags;
+
+       spin_lock_irqsave(&irq->lock, flags);
+
+       irq->masked = false;
+       if (__rvic_is_enabled(rvic) && irq->pending && !rvic_irq_queued(irq))
+               list_add_tail(&irq->delivery_entry, &rvic->delivery);
+
+       spin_unlock_irqrestore(&irq->lock, flags);
+}
+
+static unsigned int __rvic_ack(struct rvic *rvic)
+{
+       unsigned int intid = ~0U;
+       struct rvic_irq *irq;
+
+       if (!__rvic_is_enabled(rvic))
+               return intid;
+
+       irq = list_first_entry_or_null(&rvic->delivery, struct rvic_irq,
+                                      delivery_entry);
+       if (irq) {
+               intid = irq->intid;
+               __rvic_set_masked(rvic, intid);
+               __rvic_clear_pending(rvic, intid);
+       }
+
+       return intid;
+}
+
+static bool __rvic_can_signal(struct rvic *rvic)
+{
+       return __rvic_is_enabled(rvic) && !list_empty(&rvic->delivery);
+}
+
+static void __rvic_resample(struct rvic *rvic, unsigned int intid)
+{
+       struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+       unsigned long flags;
+       bool pending;
+
+       spin_lock_irqsave(&irq->lock, flags);
+       if (irq->get_line_level) {
+               pending = irq->get_line_level(irq->intid);
+
+               /*
+                * As part of the resampling, tickle the GIC so that
+                * new interrupts can trickle in.
+                */
+               if (!pending && irq->host_irq)
+                       irq_set_irqchip_state(irq->host_irq,
+                                             IRQCHIP_STATE_ACTIVE, false);
+       } else {
+               pending = irq->line_level;
+       }
+
+       spin_unlock_irqrestore(&irq->lock, flags);
+
+       if (pending)
+               __rvic_set_pending(rvic, intid);
+}
+
+/*
+ * rVIC hypercall handling. All functions assume they are being called
+ * from the vcpu thread that triggers the hypercall.
+ */
+static void __rvic_kick_vcpu(struct kvm_vcpu *vcpu)
+{
+       kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
+       kvm_vcpu_kick(vcpu);
+}
+
+static void __rvic_sync_hcr(struct kvm_vcpu *vcpu, struct rvic *rvic,
+                           bool was_signaling)
+{
+       struct kvm_vcpu *target = kvm_rvic_to_vcpu(rvic);
+       bool signal = __rvic_can_signal(rvic);
+
+       /* We're hitting our own rVIC: update HCR_VI locally */
+       if (vcpu == target) {
+               if (signal)
+                       *vcpu_hcr(vcpu) |= HCR_VI;
+               else
+                       *vcpu_hcr(vcpu) &= ~HCR_VI;
+
+               return;
+       }
+
+       /*
+        * Remote rVIC case:
+        *
+        * We kick even if the interrupt disappears, as ISR_EL1.I must
+        * always reflect the state of the rVIC. This forces a reload
+        * of the vcpu state, making it consistent.
+        *
+        * This avoids modifying the target's own copy of HCR_EL2, as
+        * we are in a cross-vcpu call, and changing it from under its
+        * feet is dodgy.
+        */
+       if (was_signaling != signal)
+               __rvic_kick_vcpu(target);
+}
+
+static void rvic_version(struct kvm_vcpu *vcpu)
+{
+       /* ALP0.3 is the name of the game */
+       smccc_set_retval(vcpu, RVIC_STATUS_SUCCESS, RVIC_VERSION(0, 3), 0, 0);
+}
+
+static void rvic_info(struct kvm_vcpu *vcpu)
+{
+       struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+       unsigned long what = smccc_get_arg1(vcpu);
+       unsigned long a0, a1;
+
+       switch (what) {
+       case RVIC_INFO_KEY_NR_TRUSTED_INTERRUPTS:
+               a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+               a1 = rvic->nr_trusted;
+               break;
+       case RVIC_INFO_KEY_NR_UNTRUSTED_INTERRUPTS:
+               a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+               a1 = rvic_nr_untrusted(rvic);
+               break;
+       default:
+               a0 = RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 0);
+               a1 = 0;
+               break;
+       }
+
+       smccc_set_retval(vcpu, a0, a1, 0, 0);
+}
+
+static void rvic_enable(struct kvm_vcpu *vcpu)
+{
+       struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+       unsigned long flags;
+       bool was_signaling;
+
+       spin_lock_irqsave(&rvic->lock, flags);
+
+       was_signaling = __rvic_can_signal(rvic);
+       __rvic_enable(rvic);
+       __rvic_sync_hcr(vcpu, rvic, was_signaling);
+
+       spin_unlock_irqrestore(&rvic->lock, flags);
+
+       smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
+                        0, 0, 0);
+}
+
+static void rvic_disable(struct kvm_vcpu *vcpu)
+{
+       struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+       unsigned long flags;
+       bool was_signaling;
+
+       spin_lock_irqsave(&rvic->lock, flags);
+
+       was_signaling = __rvic_can_signal(rvic);
+       __rvic_disable(rvic);
+       __rvic_sync_hcr(vcpu, rvic, was_signaling);
+
+       spin_unlock_irqrestore(&rvic->lock, flags);
+
+       smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
+                        0, 0, 0);
+}
+
+typedef void (*rvic_action_fn_t)(struct rvic *, unsigned int);
+
+static int validate_rvic_call(struct kvm_vcpu *vcpu, struct rvic **rvicp,
+                             unsigned int *intidp)
+{
+       unsigned long mpidr = smccc_get_arg1(vcpu);
+       unsigned int intid = smccc_get_arg2(vcpu);
+       struct kvm_vcpu *target;
+       struct rvic *rvic;
+
+       /* FIXME: The spec distinguishes between invalid MPIDR and invalid CPU 
*/
+
+       target = kvm_mpidr_to_vcpu(vcpu->kvm, mpidr);
+       if (!target) {
+               smccc_set_retval(vcpu, 
RVIx_STATUS_PACK(RVIC_STATUS_INVALID_CPU, 0),
+                                0, 0, 0);
+               return -1;
+       }
+
+       rvic = kvm_vcpu_to_rvic(target);
+       if (intid >= rvic->nr_total) {
+               smccc_set_retval(vcpu, 
RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 1),
+                                0, 0, 0);
+               return -1;
+       }
+
+       *rvicp = rvic;
+       *intidp = intid;
+
+       return 0;
+}
+
+static void __rvic_action(struct kvm_vcpu *vcpu, rvic_action_fn_t action,
+                         bool check_enabled)
+{
+       struct rvic *rvic;
+       unsigned long a0;
+       unsigned long flags;
+       int intid;
+
+       if (validate_rvic_call(vcpu, &rvic, &intid))
+               return;
+
+       spin_lock_irqsave(&rvic->lock, flags);
+
+       if (unlikely(check_enabled && !__rvic_is_enabled(rvic))) {
+               a0 = RVIx_STATUS_PACK(RVIC_STATUS_DISABLED, 0);
+       } else {
+               bool was_signaling = __rvic_can_signal(rvic);
+               action(rvic, intid);
+               __rvic_sync_hcr(vcpu, rvic, was_signaling);
+               a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+       }
+
+       spin_unlock_irqrestore(&rvic->lock, flags);
+
+       smccc_set_retval(vcpu, a0, 0, 0, 0);
+}
+
+static void rvic_set_masked(struct kvm_vcpu *vcpu)
+{
+       __rvic_action(vcpu, __rvic_set_masked, false);
+}
+
+static void rvic_clear_masked(struct kvm_vcpu *vcpu)
+{
+       __rvic_action(vcpu, __rvic_clear_masked, false);
+}
+
+static void rvic_clear_pending(struct kvm_vcpu *vcpu)
+{
+       __rvic_action(vcpu, __rvic_clear_pending, false);
+}
+
+static void rvic_signal(struct kvm_vcpu *vcpu)
+{
+       __rvic_action(vcpu, __rvic_set_pending, true);
+}
+
+static void rvic_is_pending(struct kvm_vcpu *vcpu)
+{
+       unsigned long flags;
+       struct rvic *rvic;
+       int intid;
+       bool res;
+
+       if (validate_rvic_call(vcpu, &rvic, &intid))
+               return;
+
+       spin_lock_irqsave(&rvic->lock, flags);
+
+       res = __rvic_is_pending(rvic, intid);
+
+       spin_unlock_irqrestore(&rvic->lock, flags);
+
+       smccc_set_retval(vcpu, RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0),
+                        res, 0, 0);
+}
+
+/*
+ * Ack and Resample are the only "interesting" operations that are
+ * strictly per-CPU.
+ */
+static void rvic_acknowledge(struct kvm_vcpu *vcpu)
+{
+       unsigned long a0, a1;
+       unsigned long flags;
+       unsigned int intid;
+       struct rvic *rvic;
+
+       rvic = kvm_vcpu_to_rvic(vcpu);
+
+       spin_lock_irqsave(&rvic->lock, flags);
+
+       if (unlikely(!__rvic_is_enabled(rvic))) {
+               a0 = RVIx_STATUS_PACK(RVIC_STATUS_DISABLED, 0);
+               a1 = 0;
+       } else {
+               intid = __rvic_ack(rvic);
+               __rvic_sync_hcr(vcpu, rvic, true);
+               if (unlikely(intid >= rvic->nr_total)) {
+                       a0 = RVIx_STATUS_PACK(RVIC_STATUS_NO_INTERRUPTS, 0);
+                       a1 = 0;
+               } else {
+                       a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+                       a1 = intid;
+               }
+       }
+
+       spin_unlock_irqrestore(&rvic->lock, flags);
+
+       smccc_set_retval(vcpu, a0, a1, 0, 0);
+}
+
+static void rvic_resample(struct kvm_vcpu *vcpu)
+{
+       unsigned int intid = smccc_get_arg1(vcpu);
+       unsigned long flags;
+       unsigned long a0;
+       struct rvic *rvic;
+
+       rvic = kvm_vcpu_to_rvic(vcpu);
+
+       spin_lock_irqsave(&rvic->lock, flags);
+
+       if (unlikely(intid >= rvic->nr_trusted)) {
+               a0 = RVIx_STATUS_PACK(RVIC_STATUS_ERROR_PARAMETER, 0);
+       } else {
+               __rvic_resample(rvic, intid);
+
+               /*
+                * Don't bother finding out if we were signalling, we
+                * will update HCR_EL2 anyway as we are guaranteed not
+                * to be in a cross-call.
+                */
+               __rvic_sync_hcr(vcpu, rvic, true);
+               a0 = RVIx_STATUS_PACK(RVIC_STATUS_SUCCESS, 0);
+       }
+
+       spin_unlock_irqrestore(&rvic->lock, flags);
+
+       smccc_set_retval(vcpu, a0, 0, 0, 0);
+}
+
+int kvm_rvic_handle_hcall(struct kvm_vcpu *vcpu)
+{
+       pr_debug("RVIC: HC %08x", (unsigned int)smccc_get_function(vcpu));
+       switch (smccc_get_function(vcpu)) {
+       case SMC64_RVIC_VERSION:
+               rvic_version(vcpu);
+               break;
+       case SMC64_RVIC_INFO:
+               rvic_info(vcpu);
+               break;
+       case SMC64_RVIC_ENABLE:
+               rvic_enable(vcpu);
+               break;
+       case SMC64_RVIC_DISABLE:
+               rvic_disable(vcpu);
+               break;
+       case SMC64_RVIC_SET_MASKED:
+               rvic_set_masked(vcpu);
+               break;
+       case SMC64_RVIC_CLEAR_MASKED:
+               rvic_clear_masked(vcpu);
+               break;
+       case SMC64_RVIC_IS_PENDING:
+               rvic_is_pending(vcpu);
+               break;
+       case SMC64_RVIC_SIGNAL:
+               rvic_signal(vcpu);
+               break;
+       case SMC64_RVIC_CLEAR_PENDING:
+               rvic_clear_pending(vcpu);
+               break;
+       case SMC64_RVIC_ACKNOWLEDGE:
+               rvic_acknowledge(vcpu);
+               break;
+       case SMC64_RVIC_RESAMPLE:
+               rvic_resample(vcpu);
+               break;
+       default:
+               smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
+               break;
+       }
+
+       return 1;
+}
+
+static void rvid_version(struct kvm_vcpu *vcpu)
+{
+       /* ALP0.3 is the name of the game */
+       smccc_set_retval(vcpu, RVID_STATUS_SUCCESS, RVID_VERSION(0, 3), 0, 0);
+}
+
+static void rvid_map(struct kvm_vcpu *vcpu)
+{
+       unsigned long input = smccc_get_arg1(vcpu);
+       unsigned long mpidr = smccc_get_arg2(vcpu);
+       unsigned int intid = smccc_get_arg3(vcpu);
+       unsigned long flags;
+       struct rvic_vm_data *data;
+       struct kvm_vcpu *target;
+
+       data = vcpu->kvm->arch.irqchip_data;
+
+       if (input > rvic_nr_untrusted(data)) {
+               smccc_set_retval(vcpu, 
RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 0),
+                                0, 0, 0);
+               return;
+       }
+
+       /* FIXME: different error from RVIC. Why? */
+       target = kvm_mpidr_to_vcpu(vcpu->kvm, mpidr);
+       if (!target) {
+               smccc_set_retval(vcpu, 
RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 1),
+                                0, 0, 0);
+               return;
+       }
+
+       if (intid < data->nr_trusted || intid >= data->nr_total) {
+               smccc_set_retval(vcpu, 
RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 2),
+                                0, 0, 0);
+               return;
+       }
+
+       spin_lock_irqsave(&data->lock, flags);
+       data->rvid_map[input].target_vcpu       = target->vcpu_id;
+       data->rvid_map[input].intid             = intid;
+       spin_unlock_irqrestore(&data->lock, flags);
+
+       smccc_set_retval(vcpu, 0, 0, 0, 0);
+}
+
+static void rvid_unmap(struct kvm_vcpu *vcpu)
+{
+       unsigned long input = smccc_get_arg1(vcpu);
+       unsigned long flags;
+       struct rvic_vm_data *data;
+
+       data = vcpu->kvm->arch.irqchip_data;
+
+       if (input > rvic_nr_untrusted(data)) {
+               smccc_set_retval(vcpu, 
RVIx_STATUS_PACK(RVID_STATUS_ERROR_PARAMETER, 0),
+                                0, 0, 0);
+               return;
+       }
+
+       spin_lock_irqsave(&data->lock, flags);
+       data->rvid_map[input].target_vcpu       = 0;
+       data->rvid_map[input].intid             = 0;
+       spin_unlock_irqrestore(&data->lock, flags);
+
+       smccc_set_retval(vcpu, 0, 0, 0, 0);
+}
+
+int kvm_rvid_handle_hcall(struct kvm_vcpu *vcpu)
+{
+       pr_debug("RVID: HC %08x", (unsigned int)smccc_get_function(vcpu));
+       switch (smccc_get_function(vcpu)) {
+       case SMC64_RVID_VERSION:
+               rvid_version(vcpu);
+               break;
+       case SMC64_RVID_MAP:
+               rvid_map(vcpu);
+               break;
+       case SMC64_RVID_UNMAP:
+               rvid_unmap(vcpu);
+               break;
+       default:
+               smccc_set_retval(vcpu, SMCCC_RET_NOT_SUPPORTED, 0, 0, 0);
+               break;
+       }
+
+       return 1;
+}
+
+/*
+ * KVM internal interface to the rVIC
+ */
+
+/* This *must* be called from the vcpu thread */
+static void rvic_flush_signaling_state(struct kvm_vcpu *vcpu)
+{
+       struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+       unsigned long flags;
+
+       spin_lock_irqsave(&rvic->lock, flags);
+
+       __rvic_sync_hcr(vcpu, rvic, true);
+
+       spin_unlock_irqrestore(&rvic->lock, flags);
+}
+
+/* This can be called from any context */
+static void rvic_vcpu_inject_irq(struct kvm_vcpu *vcpu, unsigned int intid,
+                                bool level)
+{
+       struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+       unsigned long flags;
+       bool prev;
+
+       spin_lock_irqsave(&rvic->lock, flags);
+
+       if (WARN_ON(intid >= rvic->nr_total))
+               goto out;
+
+       /*
+        * Although really ugly, this should be safe as we hold the
+        * rvic lock, and the only path that uses this information is
+        * resample, which takes this lock too.
+        */
+       if (!rvic->irqs[intid].get_line_level)
+               rvic->irqs[intid].line_level = level;
+
+       if (level) {
+               prev = __rvic_can_signal(rvic);
+               __rvic_set_pending(rvic, intid);
+               if (prev != __rvic_can_signal(rvic))
+                       __rvic_kick_vcpu(vcpu);
+       }
+out:
+       spin_unlock_irqrestore(&rvic->lock, flags);
+}
+
+static int rvic_inject_irq(struct kvm *kvm, unsigned int cpu,
+                          unsigned int intid, bool level, void *owner)
+{
+       struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, cpu);
+       struct rvic *rvic;
+
+       if (unlikely(!vcpu))
+               return -EINVAL;
+
+       rvic = kvm_vcpu_to_rvic(vcpu);
+       if (unlikely(intid >= rvic->nr_total))
+               return -EINVAL;
+
+       /* Ignore interrupt owner for now */
+       rvic_vcpu_inject_irq(vcpu, intid, level);
+       return 0;
+}
+
+static int rvic_inject_userspace_irq(struct kvm *kvm, unsigned int type,
+                                    unsigned int cpu,
+                                    unsigned int intid, bool level)
+{
+       struct rvic_vm_data *data = kvm->arch.irqchip_data;
+       unsigned long flags;
+       u16 output;
+
+       switch (type) {
+       case KVM_ARM_IRQ_TYPE_SPI:
+               /*
+                * Userspace can only inject interrupts that are
+                * translated by the rvid, so the cpu parameter is
+                * irrelevant and we override it when resolving the
+                * translation.
+                */
+               if (intid >= rvic_nr_untrusted(data))
+                       return -EINVAL;
+
+               spin_lock_irqsave(&data->lock, flags);
+               output = data->rvid_map[intid].intid;
+               cpu = data->rvid_map[intid].target_vcpu;
+               spin_unlock_irqrestore(&data->lock, flags);
+
+               /* Silently ignore unmapped interrupts */
+               if (output < data->nr_trusted)
+                       return 0;
+
+               return rvic_inject_irq(kvm, cpu, output, level, NULL);
+       default:
+               return -EINVAL;
+       }
+}
+
+static int rvic_vcpu_init(struct kvm_vcpu *vcpu)
+{
+       struct rvic_vm_data *data = vcpu->kvm->arch.irqchip_data;
+       struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+       int i;
+
+       /* irqchip not ready yet, we will come back later */
+       if (!data)
+               return 0;
+
+       if (WARN_ON(rvic->irqs))
+               return -EINVAL;
+
+       spin_lock_init(&rvic->lock);
+       INIT_LIST_HEAD(&rvic->delivery);
+       rvic->nr_trusted        = data->nr_trusted;
+       rvic->nr_total          = data->nr_total;
+       rvic->enabled           = false;
+
+       rvic->irqs = kcalloc(rvic->nr_total, sizeof(*rvic->irqs), GFP_ATOMIC);
+       if (!rvic->irqs)
+               return -ENOMEM;
+
+       for (i = 0; i < rvic->nr_total; i++) {
+               struct rvic_irq *irq = &rvic->irqs[i];
+
+               spin_lock_init(&irq->lock);
+               INIT_LIST_HEAD(&irq->delivery_entry);
+               irq->get_line_level     = NULL;
+               irq->intid              = i;
+               irq->host_irq           = 0;
+               irq->pending            = false;
+               irq->masked             = true;
+               irq->line_level         = false;
+       }
+
+       return 0;
+}
+
+static void rvic_destroy(struct kvm *kvm)
+{
+       struct kvm_vcpu *vcpu;
+       int i;
+
+       mutex_lock(&kvm->lock);
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+
+               INIT_LIST_HEAD(&rvic->delivery);
+               kfree(rvic->irqs);
+               rvic->irqs = NULL;
+       }
+
+       mutex_unlock(&kvm->lock);
+}
+
+static int rvic_pending_irq(struct kvm_vcpu *vcpu)
+{
+       struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+       unsigned long flags;
+       bool res;
+
+       spin_lock_irqsave(&rvic->lock, flags);
+       res = __rvic_can_signal(rvic);
+       spin_unlock_irqrestore(&rvic->lock, flags);
+
+       return res;
+}
+
+static int rvic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
+                            u32 intid, bool (*get_line_level)(int))
+{
+       struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+       struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+       unsigned long flags;
+
+       spin_lock_irqsave(&irq->lock, flags);
+       irq->host_irq = host_irq;
+       irq->get_line_level = get_line_level;
+       spin_unlock_irqrestore(&irq->lock, flags);
+
+       return 0;
+}
+
+static int rvic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int intid)
+{
+       struct rvic *rvic = kvm_vcpu_to_rvic(vcpu);
+       struct rvic_irq *irq = rvic_get_irq(rvic, intid);
+       unsigned long flags;
+
+       spin_lock_irqsave(&irq->lock, flags);
+       irq->host_irq = 0;
+       irq->get_line_level = NULL;
+       spin_unlock_irqrestore(&irq->lock, flags);
+
+       return 0;
+}
+
+static int rvic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e,
+                             struct kvm *kvm, int irq_source_id,
+                             int level, bool line_status)
+{
+       /* Abuse the userspace interface to perform the routing*/
+       return rvic_inject_userspace_irq(kvm, KVM_ARM_IRQ_TYPE_SPI, 0,
+                                        e->irqchip.pin, level);
+}
+
+static int rvic_set_msi(struct kvm_kernel_irq_routing_entry *e,
+                       struct kvm *kvm, int irq_source_id,
+                       int level, bool line_status)
+{
+       return -ENODEV;
+}
+
+static int rvic_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e,
+                                struct kvm *kvm, int irq_source_id,
+                                int level, bool line_status)
+{
+       if (e->type != KVM_IRQ_ROUTING_IRQCHIP)
+               return -EWOULDBLOCK;
+
+       return rvic_irqfd_set_irq(e, kvm, irq_source_id, level, line_status);
+}
+
+static const struct kvm_irqchip_flow rvic_irqchip_flow = {
+       .irqchip_destroy                = rvic_destroy,
+       .irqchip_vcpu_init              = rvic_vcpu_init,
+       /* Nothing to do on block/unblock */
+       /* Nothing to do on load/put */
+       .irqchip_vcpu_pending_irq       = rvic_pending_irq,
+       .irqchip_vcpu_flush_hwstate     = rvic_flush_signaling_state,
+       /* Nothing tp do on sync_hwstate */
+       .irqchip_inject_irq             = rvic_inject_irq,
+       .irqchip_inject_userspace_irq   = rvic_inject_userspace_irq,
+       /* No reset_mapped_irq as we allow spurious interrupts */
+       .irqchip_map_phys_irq           = rvic_map_phys_irq,
+       .irqchip_unmap_phys_irq         = rvic_unmap_phys_irq,
+       .irqchip_irqfd_set_irq          = rvic_irqfd_set_irq,
+       .irqchip_set_msi                = rvic_set_msi,
+       .irqchip_set_irq_inatomic       = rvic_set_irq_inatomic,
+};
+
+static int rvic_setup_default_irq_routing(struct kvm *kvm)
+{
+       struct rvic_vm_data *data = kvm->arch.irqchip_data;
+       unsigned int nr = rvic_nr_untrusted(data);
+       struct kvm_irq_routing_entry *entries;
+       int i, ret;
+
+       entries = kcalloc(nr, sizeof(*entries), GFP_KERNEL);
+       if (!entries)
+               return -ENOMEM;
+
+       for (i = 0; i < nr; i++) {
+               entries[i].gsi = i;
+               entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+               entries[i].u.irqchip.irqchip = 0;
+               entries[i].u.irqchip.pin = i;
+       }
+       ret = kvm_set_irq_routing(kvm, entries, nr, 0);
+       kfree(entries);
+       return ret;
+}
+
+/* Device management */
+static int rvic_device_create(struct kvm_device *dev, u32 type)
+{
+       struct kvm *kvm = dev->kvm;
+       struct kvm_vcpu *vcpu;
+       int i, ret;
+
+       if (irqchip_in_kernel(kvm))
+               return -EEXIST;
+
+       ret = -EBUSY;
+       if (!lock_all_vcpus(kvm))
+               return ret;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (vcpu->arch.has_run_once)
+                       goto out_unlock;
+       }
+
+       ret = 0;
+
+       /*
+        * The good thing about not having any HW is that you don't
+        * get the limitations of the HW...
+        */
+       kvm->arch.max_vcpus             = KVM_MAX_VCPUS;
+       kvm->arch.irqchip_type          = IRQCHIP_RVIC;
+       kvm->arch.irqchip_flow          = rvic_irqchip_flow;
+       kvm->arch.irqchip_data          = NULL;
+
+out_unlock:
+       unlock_all_vcpus(kvm);
+       return ret;
+}
+
+static void rvic_device_destroy(struct kvm_device *dev)
+{
+       kfree(dev->kvm->arch.irqchip_data);
+       kfree(dev);
+}
+
+static int rvic_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+       struct rvic_vm_data *data;
+       struct kvm_vcpu *vcpu;
+       u32 __user *uaddr, val;
+       u16 trusted, total;
+       int i, ret = -ENXIO;
+
+       mutex_lock(&dev->kvm->lock);
+
+       switch (attr->group) {
+       case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
+               if (attr->attr)
+                       break;
+
+               if (dev->kvm->arch.irqchip_data) {
+                       ret = -EBUSY;
+                       break;
+               }
+
+               uaddr = (u32 __user *)(uintptr_t)attr->addr;
+               if (get_user(val, uaddr)) {
+                       ret = -EFAULT;
+                       break;
+               }
+
+               trusted = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK, val);
+               total   = FIELD_GET(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK, val);
+               if (total < trusted || trusted < 32 || total < 64 ||
+                   trusted % 32 || total % 32 || total > 2048) {
+                       ret = -EINVAL;
+                       break;
+               }
+
+               data = kzalloc(struct_size(data, rvid_map, (total - trusted)),
+                              GFP_KERNEL);
+               if (!data) {
+                       ret = -ENOMEM;
+                       break;
+               }
+
+               data->nr_trusted = trusted;
+               data->nr_total = total;
+               spin_lock_init(&data->lock);
+               /* Default to no mapping */
+               for (i = 0; i < (total - trusted); i++) {
+                       /*
+                        * an intid < nr_trusted is invalid as the
+                        * result of a translation through the rvid,
+                        * hence the input in unmapped.
+                        */
+                       data->rvid_map[i].target_vcpu = 0;
+                       data->rvid_map[i].intid = 0;
+               }
+
+               dev->kvm->arch.irqchip_data = data;
+
+               ret = 0;
+               break;
+
+       case KVM_DEV_ARM_RVIC_GRP_INIT:
+               if (attr->attr)
+                       break;
+
+               if (!dev->kvm->arch.irqchip_data)
+                       break;
+
+               ret = 0;
+
+               /* Init the rvic on any already created vcpu */
+               kvm_for_each_vcpu(i, vcpu, dev->kvm) {
+                       ret = rvic_vcpu_init(vcpu);
+                       if (ret)
+                               break;
+               }
+
+               if (!ret)
+                       ret = rvic_setup_default_irq_routing(dev->kvm);
+               if (!ret)
+                       dev->kvm->arch.irqchip_finalized = true;
+               break;
+
+       default:
+               break;
+       }
+
+       mutex_unlock(&dev->kvm->lock);
+
+       return ret;
+}
+
+static int rvic_get_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+       struct rvic_vm_data *data;
+       u32 __user *uaddr, val;
+       int ret = -ENXIO;
+
+       mutex_lock(&dev->kvm->lock);
+
+       switch (attr->group) {
+       case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
+               if (attr->attr)
+                       break;
+
+               data = dev->kvm->arch.irqchip_data;
+               if (!data)
+                       break;
+
+               val  = FIELD_PREP(KVM_DEV_ARM_RVIC_GRP_NR_TRUSTED_MASK,
+                                        data->nr_trusted);
+               val |= FIELD_PREP(KVM_DEV_ARM_RVIC_GRP_NR_TOTAL_MASK,
+                                        data->nr_total);
+
+               uaddr = (u32 __user *)(uintptr_t)attr->addr;
+               ret = put_user(val, uaddr);
+               break;
+
+       default:
+               break;
+       }
+
+       mutex_unlock(&dev->kvm->lock);
+
+       return ret;
+}
+
+static int rvic_has_attr(struct kvm_device *dev, struct kvm_device_attr *attr)
+{
+       int ret = -ENXIO;
+
+       switch (attr->group) {
+       case KVM_DEV_ARM_RVIC_GRP_NR_IRQS:
+       case KVM_DEV_ARM_RVIC_GRP_INIT:
+               if (attr->attr)
+                       break;
+               ret = 0;
+               break;
+
+       default:
+               break;
+       }
+
+       return ret;
+}
+
+static const struct kvm_device_ops rvic_dev_ops = {
+       .name           = "kvm-arm-rvic",
+       .create         = rvic_device_create,
+       .destroy        = rvic_device_destroy,
+       .set_attr       = rvic_set_attr,
+       .get_attr       = rvic_get_attr,
+       .has_attr       = rvic_has_attr,
+};
+
+int kvm_register_rvic_device(void)
+{
+       return kvm_register_device_ops(&rvic_dev_ops, KVM_DEV_TYPE_ARM_RVIC);
+}
diff --git a/include/kvm/arm_rvic.h b/include/kvm/arm_rvic.h
new file mode 100644
index 000000000000..9e67a83fa384
--- /dev/null
+++ b/include/kvm/arm_rvic.h
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * rVIC/rVID PV interrupt controller implementation for KVM/arm64.
+ *
+ * Copyright 2020 Google LLC.
+ * Author: Marc Zyngier <m...@kernel.org>
+ */
+
+#ifndef __KVM_ARM_RVIC_H__
+#define __KVM_ARM_RVIC_H__
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+
+struct kvm_vcpu;
+
+struct rvic_irq {
+       spinlock_t              lock;
+       struct list_head        delivery_entry;
+       bool                    (*get_line_level)(int intid);
+       unsigned int            intid;
+       unsigned int            host_irq;
+       bool                    pending;
+       bool                    masked;
+       bool                    line_level; /* If get_line_level == NULL */
+};
+
+struct rvic {
+       spinlock_t              lock;
+       struct list_head        delivery;
+       struct rvic_irq         *irqs;
+       unsigned int            nr_trusted;
+       unsigned int            nr_total;
+       bool                    enabled;
+};
+
+int kvm_rvic_handle_hcall(struct kvm_vcpu *vcpu);
+int kvm_rvid_handle_hcall(struct kvm_vcpu *vcpu);
+int kvm_register_rvic_device(void);
+
+#endif
diff --git a/include/linux/irqchip/irq-rvic.h b/include/linux/irqchip/irq-rvic.h
index 4545c1e89741..b188773729fb 100644
--- a/include/linux/irqchip/irq-rvic.h
+++ b/include/linux/irqchip/irq-rvic.h
@@ -57,6 +57,8 @@
 #define SMC64_RVIC_ACKNOWLEDGE         SMC64_RVIC_FN(9)
 #define SMC64_RVIC_RESAMPLE            SMC64_RVIC_FN(10)
 
+#define SMC64_RVIC_LAST                        SMC64_RVIC_RESAMPLE
+
 #define RVIC_INFO_KEY_NR_TRUSTED_INTERRUPTS    0
 #define RVIC_INFO_KEY_NR_UNTRUSTED_INTERRUPTS  1
 
@@ -82,6 +84,8 @@
 #define SMC64_RVID_MAP                 SMC64_RVID_FN(1)
 #define SMC64_RVID_UNMAP               SMC64_RVID_FN(2)
 
+#define SMC64_RVID_LAST                        SMC64_RVID_UNMAP
+
 #define RVID_VERSION(M, m)             RVIx_VERSION((M), (m))
 
 #define RVID_VERSION_MAJOR(v)          RVIx_VERSION_MAJOR((v))
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index f6d86033c4fa..6d245d2dc9e6 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -1264,6 +1264,8 @@ enum kvm_device_type {
 #define KVM_DEV_TYPE_XIVE              KVM_DEV_TYPE_XIVE
        KVM_DEV_TYPE_ARM_PV_TIME,
 #define KVM_DEV_TYPE_ARM_PV_TIME       KVM_DEV_TYPE_ARM_PV_TIME
+       KVM_DEV_TYPE_ARM_RVIC,
+#define KVM_DEV_TYPE_ARM_RVIC          KVM_DEV_TYPE_ARM_RVIC
        KVM_DEV_TYPE_MAX,
 };
 
-- 
2.27.0

_______________________________________________
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm

Reply via email to