From: Benjamin Herrenschmidt <b...@kernel.crashing.org>

This adds an implementation of the XICS hypercalls in real mode for HV
KVM, which allows us to avoid exiting the guest MMU context on all
threads for a variety of operations such as fetching a pending
interrupt, EOI of messages, IPIs, etc.

For debugging purposes, the use of the real mode implementation can be
disabled by setting the KVM_ICP_FLAG_NOREALMODE bit in the icp.flags
field of struct kvm_irqchip_args.

Signed-off-by: Benjamin Herrenschmidt <b...@kernel.crashing.org>
Signed-off-by: Paul Mackerras <pau...@samba.org>
---
 arch/powerpc/include/uapi/asm/kvm.h  |    1 +
 arch/powerpc/kvm/Makefile            |    1 +
 arch/powerpc/kvm/book3s_hv_rm_xics.c |  402 ++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/book3s_xics.c       |  153 +++++--------
 arch/powerpc/kvm/book3s_xics.h       |  116 ++++++++++
 5 files changed, 572 insertions(+), 101 deletions(-)
 create mode 100644 arch/powerpc/kvm/book3s_hv_rm_xics.c
 create mode 100644 arch/powerpc/kvm/book3s_xics.h

diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
b/arch/powerpc/include/uapi/asm/kvm.h
index 145c645..55c1907 100644
--- a/arch/powerpc/include/uapi/asm/kvm.h
+++ b/arch/powerpc/include/uapi/asm/kvm.h
@@ -314,6 +314,7 @@ struct kvm_irqchip_args {
                 * structures.
                 */
                struct {
+#define KVM_ICP_FLAG_NOREALMODE                0x00000001 /* Disable real mode 
ICP */
                        __u32 flags;
                } icp;
 
diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile
index ec2f8da..c3d958d 100644
--- a/arch/powerpc/kvm/Makefile
+++ b/arch/powerpc/kvm/Makefile
@@ -72,6 +72,7 @@ kvm-book3s_64-builtin-objs-$(CONFIG_KVM_BOOK3S_64_HV) := \
        book3s_hv_rmhandlers.o \
        book3s_hv_rm_mmu.o \
        book3s_64_vio_hv.o \
+       book3s_hv_rm_xics.o \
        book3s_hv_builtin.o
 
 kvm-book3s_64-module-objs := \
diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c 
b/arch/powerpc/kvm/book3s_hv_rm_xics.c
new file mode 100644
index 0000000..49bb25b
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/kvm_book3s.h>
+#include <asm/kvm_ppc.h>
+#include <asm/hvcall.h>
+#include <asm/xics.h>
+#include <asm/debug.h>
+#include <asm/synch.h>
+#include <asm/ppc-opcode.h>
+
+#include "book3s_xics.h"
+
+#define DEBUG_PASSUP
+
+static inline void rm_writeb(unsigned long paddr, u8 val)
+{
+       __asm__ __volatile__("sync; stbcix %0,0,%1"
+               : : "r" (val), "r" (paddr) : "memory");
+}
+
+static void icp_rm_set_vcpu_irq(struct kvm_vcpu *vcpu, struct kvm_vcpu 
*this_vcpu)
+{
+       struct kvmppc_icp *this_icp = this_vcpu->arch.icp;
+       unsigned long xics_phys;
+       int cpu;
+
+       /* Mark the target VCPU as having an interrupt pending */
+       vcpu->stat.queue_intr++;
+       set_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
+
+       /* Kick self ? Just set MER and return */
+       if (vcpu == this_vcpu) {
+               mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) | LPCR_MER);
+               return;
+       }
+
+       /* Check if the core is loaded, if not, too hard */
+       cpu = vcpu->cpu;
+       if (cpu < 0 || cpu >= nr_cpu_ids) {
+               this_icp->rm_action |= XICS_RM_KICK_VCPU;
+               this_icp->rm_kick_target = vcpu;
+               return;
+       }
+       /* In SMT cpu will always point to thread 0, we adjust it */
+       cpu += vcpu->arch.ptid;
+
+       /* Not too hard, then poke the target */
+       xics_phys = paca[cpu].kvm_hstate.xics_phys;
+       rm_writeb(xics_phys + XICS_MFRR, IPI_PRIORITY);
+}
+
+static void icp_rm_clr_vcpu_irq(struct kvm_vcpu *vcpu)
+{
+       /* Note: Only called on self ! */
+       clear_bit(BOOK3S_IRQPRIO_EXTERNAL_LEVEL, 
&vcpu->arch.pending_exceptions);
+       mtspr(SPRN_LPCR, mfspr(SPRN_LPCR) & ~LPCR_MER);
+}
+
+static inline bool icp_rm_try_update(struct kvmppc_icp *icp,
+                                    union kvmppc_icp_state old,
+                                    union kvmppc_icp_state new)
+{
+       struct kvm_vcpu *this_vcpu = local_paca->kvm_hstate.kvm_vcpu;
+       bool success;
+
+       /* Calculate new output value */
+       new.out_ee = (new.xisr && (new.pending_pri < new.cppr));
+
+       /* Attempt atomic update */
+       success = cmpxchg64(&icp->state.raw, old.raw, new.raw) == old.raw;
+       if (!success)
+               goto bail;
+
+       /*
+        * Check for output state update
+        *
+        * Note that this is racy since another processor could be updating
+        * the state already. This is why we never clear the interrupt output
+        * here, we only ever set it. The clear only happens prior to doing
+        * an update and only by the processor itself. Currently we do it
+        * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
+        *
+        * We also do not try to figure out whether the EE state has changed,
+        * we unconditionally set it if the new state calls for it. The reason
+        * for that is that we opportunistically remove the pending interrupt
+        * flag when raising CPPR, so we need to set it back here if an
+        * interrupt is still pending.
+        */
+       if (new.out_ee)
+               icp_rm_set_vcpu_irq(icp->vcpu, this_vcpu);
+
+       /* Expose the state change for debug purposes */
+       this_vcpu->arch.icp->rm_dbgstate = new;
+       this_vcpu->arch.icp->rm_dbgtgt = icp->vcpu;
+
+ bail:
+       return success;
+}
+
+static inline int check_too_hard(struct kvmppc_xics *xics, struct kvmppc_icp 
*icp)
+{
+       return (xics->real_mode_dbg || icp->rm_action) ? H_TOO_HARD : H_SUCCESS;
+}
+
+static void icp_rm_down_cppr(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
+                            u8 new_cppr)
+{
+       union kvmppc_icp_state old_state, new_state;
+       bool resend;
+
+       /*
+        * This handles several related states in one operation:
+        *
+        * ICP State: Down_CPPR
+        *
+        * Load CPPR with new value and if the XISR is 0
+        * then check for resends:
+        *
+        * ICP State: Resend
+        *
+        * If MFRR is more favored than CPPR, check for IPIs
+        * and notify ICS of a potential resend. This is done
+        * asynchronously (when used in real mode, we will have
+        * to exit here).
+        *
+        * We do not handle the complete Check_IPI as documented
+        * here. In the PAPR, this state will be used for both
+        * Set_MFRR and Down_CPPR. However, we know that we aren't
+        * changing the MFRR state here so we don't need to handle
+        * the case of an MFRR causing a reject of a pending irq,
+        * this will have been handled when the MFRR was set in the
+        * first place.
+        *
+        * Thus we don't have to handle rejects, only resends.
+        *
+        * When implementing real mode for HV KVM, resend will lead to
+        * a H_TOO_HARD return and the whole transaction will be handled
+        * in virtual mode.
+        */
+       do {
+               old_state = new_state = ACCESS_ONCE(icp->state);
+
+               /* Down_CPPR */
+               new_state.cppr = new_cppr;
+
+               /*
+                * Cut down Resend / Check_IPI / IPI
+                *
+                * The logic is that we cannot have a pending interrupt
+                * trumped by an IPI at this point (see above), so we
+                * know that either the pending interrupt is already an
+                * IPI (in which case we don't care to override it) or
+                * it's either more favored than us or non existent
+                */
+               if (new_state.mfrr < new_cppr &&
+                   new_state.mfrr <= new_state.pending_pri) {
+                       new_state.pending_pri = new_state.mfrr;
+                       new_state.xisr = XICS_IPI;
+               }
+
+               /* Latch/clear resend bit */
+               resend = new_state.need_resend;
+               new_state.need_resend = 0;
+
+       } while (!icp_rm_try_update(icp, old_state, new_state));
+
+       /*
+        * Now handle resend checks. Those are asynchronous to the ICP
+        * state update in HW (ie bus transactions) so we can handle them
+        * separately here as well.
+        */
+       if (resend)
+               icp->rm_action |= XICS_RM_CHECK_RESEND;
+}
+
+
+unsigned long kvmppc_rm_h_xirr(struct kvm_vcpu *vcpu)
+{
+       union kvmppc_icp_state old_state, new_state;
+       struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+       struct kvmppc_icp *icp = vcpu->arch.icp;
+       u32 xirr;
+
+       if (!xics->real_mode)
+               return H_TOO_HARD;
+
+       /* First clear the interrupt */
+       icp_rm_clr_vcpu_irq(icp->vcpu);
+
+       /*
+        * ICP State: Accept_Interrupt
+        *
+        * Return the pending interrupt (if any) along with the
+        * current CPPR, then clear the XISR & set CPPR to the
+        * pending priority
+        */
+       do {
+               old_state = new_state = ACCESS_ONCE(icp->state);
+
+               xirr = old_state.xisr | (((u32)old_state.cppr) << 24);
+               if (!old_state.xisr)
+                       break;
+               new_state.cppr = new_state.pending_pri;
+               new_state.pending_pri = 0xff;
+               new_state.xisr = 0;
+
+       } while (!icp_rm_try_update(icp, old_state, new_state));
+
+       /* Return the result in GPR4 */
+       vcpu->arch.gpr[4] = xirr;
+
+       return check_too_hard(xics, icp);
+}
+
+int kvmppc_rm_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, unsigned long 
mfrr)
+{
+        union kvmppc_icp_state old_state, new_state;
+       struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+       struct kvmppc_icp *icp, *this_icp = vcpu->arch.icp;
+       u32 reject;
+       bool resend;
+       bool local;
+
+       if (!xics->real_mode)
+               return H_TOO_HARD;
+
+       local = vcpu->vcpu_id == server;
+       if (local)
+               icp = this_icp;
+       else
+               icp = kvmppc_xics_find_server(vcpu->kvm, server);
+       if (!icp)
+               return H_PARAMETER;
+
+       /*
+        * ICP state: Set_MFRR
+        *
+        * If the CPPR is more favored than the new MFRR, then
+        * nothing needs to be done as there can be no XISR to
+        * reject.
+        *
+        * If the CPPR is less favored, then we might be replacing
+        * an interrupt, and thus need to possibly reject it as in
+        *
+        * ICP state: Check_IPI
+        */
+       do {
+               old_state = new_state = ACCESS_ONCE(icp->state);
+
+               /* Set_MFRR */
+               new_state.mfrr = mfrr;
+
+               /* Check_IPI */
+               reject = 0;
+               resend = false;
+               if (mfrr < new_state.cppr) {
+                       /* Reject a pending interrupt if not an IPI */
+                       if (mfrr <= new_state.pending_pri)
+                               reject = new_state.xisr;
+                       new_state.pending_pri = mfrr;
+                       new_state.xisr = XICS_IPI;
+               }
+
+               if (mfrr > old_state.mfrr && mfrr > new_state.cppr) {
+                       resend = new_state.need_resend;
+                       new_state.need_resend = 0;
+               }
+       } while (!icp_rm_try_update(icp, old_state, new_state));
+
+       /* Pass rejects to virtual mode */
+       if (reject && reject != XICS_IPI) {
+               this_icp->rm_action |= XICS_RM_REJECT;
+               this_icp->rm_reject = reject;
+       }
+
+       /* Pass resends to virtual mode */
+       if (resend)
+               this_icp->rm_action |= XICS_RM_CHECK_RESEND;
+
+       return check_too_hard(xics, this_icp);
+}
+
+int kvmppc_rm_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+{
+       union kvmppc_icp_state old_state, new_state;
+       struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+       struct kvmppc_icp *icp = vcpu->arch.icp;
+       u32 reject;
+
+       if (!xics->real_mode)
+               return H_TOO_HARD;
+
+       /*
+        * ICP State: Set_CPPR
+        *
+        * We can safely compare the new value with the current
+        * value outside of the transaction as the CPPR is only
+        * ever changed by the processor on itself
+        */
+       if (cppr > icp->state.cppr) {
+               icp_rm_down_cppr(xics, icp, cppr);
+               goto bail;
+       } else if (cppr == icp->state.cppr)
+               return H_SUCCESS;
+
+       /*
+        * ICP State: Up_CPPR
+        *
+        * The processor is raising its priority, this can result
+        * in a rejection of a pending interrupt:
+        *
+        * ICP State: Reject_Current
+        *
+        * We can remove EE from the current processor, the update
+        * transaction will set it again if needed
+        */
+       icp_rm_clr_vcpu_irq(icp->vcpu);
+
+       do {
+               old_state = new_state = ACCESS_ONCE(icp->state);
+
+               reject = 0;
+               new_state.cppr = cppr;
+
+               if (cppr <= new_state.pending_pri) {
+                       reject = new_state.xisr;
+                       new_state.xisr = 0;
+                       new_state.pending_pri = 0xff;
+               }
+
+       } while (!icp_rm_try_update(icp, old_state, new_state));
+
+       /* Pass rejects to virtual mode */
+       if (reject && reject != XICS_IPI) {
+               icp->rm_action |= XICS_RM_REJECT;
+               icp->rm_reject = reject;
+       }
+ bail:
+       return check_too_hard(xics, icp);
+}
+
+int kvmppc_rm_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+{
+       struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+       struct kvmppc_icp *icp = vcpu->arch.icp;
+       struct kvmppc_ics *ics;
+       struct ics_irq_state *state;
+       u32 irq = xirr & 0x00ffffff;
+       u16 src;
+
+       if (!xics->real_mode)
+               return H_TOO_HARD;
+
+       /*
+        * ICP State: EOI
+        *
+        * Note: If EOI is incorrectly used by SW to lower the CPPR
+        * value (ie more favored), we do not check for rejection of
+        * a pending interrupt, this is a SW error and PAPR sepcifies
+        * that we don't have to deal with it.
+        *
+        * The sending of an EOI to the ICS is handled after the
+        * CPPR update
+        *
+        * ICP State: Down_CPPR which we handle
+        * in a separate function as it's shared with H_CPPR.
+        */
+       icp_rm_down_cppr(xics, icp, xirr >> 24);
+
+       /* IPIs have no EOI */
+       if (irq == XICS_IPI)
+               goto bail;
+       /*
+        * EOI handling: If the interrupt is still asserted, we need to
+        * resend it. We can take a lockless "peek" at the ICS state here.
+        *
+        * "Message" interrupts will never have "asserted" set
+        */
+       ics = kvmppc_xics_find_ics(xics, irq, &src);
+       if (!ics)
+               goto bail;
+       state = &ics->irq_state[src];
+
+       /* Still asserted, resend it, we make it look like a reject */
+       if (state->asserted) {
+               icp->rm_action |= XICS_RM_REJECT;
+               icp->rm_reject = irq;
+       }
+ bail:
+       return check_too_hard(xics, icp);
+}
diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c
index ffcdb7e..3858c14 100644
--- a/arch/powerpc/kvm/book3s_xics.c
+++ b/arch/powerpc/kvm/book3s_xics.c
@@ -22,7 +22,7 @@
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
 
-#define MASKED 0xff
+#include "book3s_xics.h"
 
 #define XICS_DBG(fmt...) do { } while (0)
 //#define XICS_DBG(fmt...) do { trace_printk(fmt); } while (0)
@@ -64,93 +64,6 @@
  * - ioctl's to save/restore the entire state for snapshot & migration
  */
 
-#define KVMPPC_XICS_MAX_BUID   0xfff
-#define KVMPPC_XICS_IRQ_COUNT  0x1000
-#define KVMPPC_XICS_BUID_SHIFT 12
-#define KVMPPC_XICS_SRC_MASK   0xfff
-
-/* State for one irq in an ics */
-struct ics_irq_state {
-       u32 number;
-       u32 server;
-       u8  priority;
-       u8  saved_priority; /* currently unused */
-       u8  resend;
-       u8  masked_pending;
-       u8  asserted; /* Only for LSI */
-};
-
-#define ICP_RESEND_MAP_SIZE    \
-       ((KVMPPC_XICS_MAX_BUID + BITS_PER_LONG - 1) / BITS_PER_LONG)
-
-/* Atomic ICP state, updated with a single compare & swap */
-union kvmppc_icp_state {
-       unsigned long raw;
-       struct {
-               u8 out_ee : 1;
-               u8 need_resend : 1;
-               u8 cppr;
-               u8 mfrr;
-               u8 pending_pri;
-               u32 xisr;
-       };
-};
-
-struct kvmppc_icp {
-       struct kvm_vcpu *vcpu;
-       union kvmppc_icp_state state;
-       unsigned long resend_map[ICP_RESEND_MAP_SIZE];
-};
-
-struct kvmppc_ics {
-       struct mutex lock;
-       u16 buid;
-       u16 nr_irqs;
-       struct ics_irq_state irq_state[];
-};
-
-struct kvmppc_xics {
-       struct kvm *kvm;
-       struct dentry *dentry;
-       u32 max_buid;
-       struct kvmppc_ics *ics[KVMPPC_XICS_MAX_BUID]; /* [1...MAX_BUID] */
-};
-
-static struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm, u32 nr)
-{
-       struct kvm_vcpu *vcpu = NULL;
-       int i;
-
-       kvm_for_each_vcpu(i, vcpu, kvm) {
-               if (nr == vcpu->vcpu_id)
-                       return vcpu->arch.icp;
-       }
-       return NULL;
-}
-
-static struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
-                                              u32 irq, u16 *source)
-{
-       u16 buid = irq >> KVMPPC_XICS_BUID_SHIFT;
-       u16 src = irq & KVMPPC_XICS_SRC_MASK;
-       struct kvmppc_ics *ics;
-
-       if (WARN_ON_ONCE(!buid || buid > KVMPPC_XICS_MAX_BUID)) {
-               XICS_DBG("kvmppc_xics_find_ics: irq %#x BUID out of range !\n",
-                        irq);
-               return NULL;
-       }
-       ics = xics->ics[buid - 1];
-       if (!ics)
-               return NULL;
-       if (src >= ics->nr_irqs)
-               return NULL;
-       if (source)
-               *source = src;
-       return ics;
-}
-
-
 /* -- ICS routines -- */
 
 static void icp_deliver_irq(struct kvmppc_xics *xics, struct kvmppc_icp *icp,
@@ -311,8 +224,10 @@ static inline bool icp_try_update(struct kvmppc_icp *icp,
         * in Accept (H_XIRR) and Up_Cppr (H_XPPR).
         *
         * We also do not try to figure out whether the EE state has changed,
-        * we unconditionally set it if the new state calls for it for the
-        * same reason.
+        * we unconditionally set it if the new state calls for it. The reason
+        * for that is that we opportunistically remove the pending interrupt
+        * flag when raising CPPR, so we need to set it back here if an
+        * interrupt is still pending.
         */
        if (new.out_ee) {
                kvmppc_book3s_queue_irqprio(icp->vcpu,
@@ -574,7 +489,7 @@ static void icp_down_cppr(struct kvmppc_xics *xics, struct 
kvmppc_icp *icp,
                icp_check_resend(xics, icp);
 }
 
-static noinline unsigned long h_xirr(struct kvm_vcpu *vcpu)
+static noinline unsigned long kvmppc_h_xirr(struct kvm_vcpu *vcpu)
 {
        union kvmppc_icp_state old_state, new_state;
        struct kvmppc_icp *icp = vcpu->arch.icp;
@@ -608,8 +523,8 @@ static noinline unsigned long h_xirr(struct kvm_vcpu *vcpu)
        return xirr;
 }
 
-static noinline int h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
-                         unsigned long mfrr)
+static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server,
+                                unsigned long mfrr)
 {
         union kvmppc_icp_state old_state, new_state;
        struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -677,7 +592,7 @@ static noinline int h_ipi(struct kvm_vcpu *vcpu, unsigned 
long server,
        return H_SUCCESS;
 }
 
-static noinline void h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
+static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr)
 {
        union kvmppc_icp_state old_state, new_state;
        struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
@@ -734,7 +649,7 @@ static noinline void h_cppr(struct kvm_vcpu *vcpu, unsigned 
long cppr)
                icp_deliver_irq(xics, icp, reject);
 }
 
-static noinline int h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
+static noinline int kvmppc_h_eoi(struct kvm_vcpu *vcpu, unsigned long xirr)
 {
        struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
        struct kvmppc_icp *icp = vcpu->arch.icp;
@@ -784,29 +699,54 @@ static noinline int h_eoi(struct kvm_vcpu *vcpu, unsigned 
long xirr)
        return H_SUCCESS;
 }
 
+static int noinline kvmppc_xics_rm_complete(struct kvm_vcpu *vcpu, u32 hcall)
+{
+       struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
+       struct kvmppc_icp *icp = vcpu->arch.icp;
+
+       XICS_DBG("XICS_RM: H_%x completing, act: %x state: %lx tgt: %p\n",
+                hcall, icp->rm_action, icp->rm_dbgstate.raw, icp->rm_dbgtgt);
+
+       if (icp->rm_action & XICS_RM_KICK_VCPU)
+               kvmppc_fast_vcpu_kick(icp->rm_kick_target);
+       if (icp->rm_action & XICS_RM_CHECK_RESEND)
+               icp_check_resend(xics, icp);
+       if (icp->rm_action & XICS_RM_REJECT)
+               icp_deliver_irq(xics, icp, icp->rm_reject);
+
+       icp->rm_action = 0;
+
+       return H_SUCCESS;
+}
+
 int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req)
 {
+       struct kvmppc_xics *xics = vcpu->kvm->arch.xics;
        unsigned long res;
        int rc = H_SUCCESS;
 
        /* Check if we have an ICP */
-       if (!vcpu->arch.icp || !vcpu->kvm->arch.xics)
+       if (!xics || !vcpu->arch.icp)
                return H_HARDWARE;
 
+       /* Check for real mode returning too hard */
+       if (xics->real_mode)
+               return kvmppc_xics_rm_complete(vcpu, req);
+
        switch (req) {
        case H_XIRR:
-               res = h_xirr(vcpu);
+               res = kvmppc_h_xirr(vcpu);
                kvmppc_set_gpr(vcpu, 4, res);
                break;
        case H_CPPR:
-               h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
+               kvmppc_h_cppr(vcpu, kvmppc_get_gpr(vcpu, 4));
                break;
        case H_EOI:
-               rc = h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
+               rc = kvmppc_h_eoi(vcpu, kvmppc_get_gpr(vcpu, 4));
                break;
        case H_IPI:
-               rc = h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
-                          kvmppc_get_gpr(vcpu, 5));
+               rc = kvmppc_h_ipi(vcpu, kvmppc_get_gpr(vcpu, 4),
+                                 kvmppc_get_gpr(vcpu, 5));
                break;
        }
 
@@ -1004,6 +944,17 @@ static int kvm_vm_ioctl_create_icp(struct kvm *kvm,
        kvm->arch.xics = xics;
        xics_debugfs_init(xics);
 
+#ifdef CONFIG_KVM_BOOK3S_64_HV
+       if (cpu_has_feature(CPU_FTR_ARCH_206)) {
+       /* Enable real mode support */
+               if (!args->icp.flags & KVM_ICP_FLAG_NOREALMODE)
+                       xics->real_mode = true;
+#ifdef DEBUG_REALMODE
+               xics->real_mode_dbg = true;
+#endif
+       }
+#endif /* CONFIG_KVM_BOOK3S_64_HV */
+
 out:
        mutex_unlock(&kvm->lock);
        return rc;
diff --git a/arch/powerpc/kvm/book3s_xics.h b/arch/powerpc/kvm/book3s_xics.h
new file mode 100644
index 0000000..951eacb
--- /dev/null
+++ b/arch/powerpc/kvm/book3s_xics.h
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2012 Michael Ellerman, IBM Corporation.
+ * Copyright 2012 Benjamin Herrenschmidt, IBM Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _KVM_PPC_BOOK3S_XICS_H
+#define _KVM_PPC_BOOK3S_XICS_H
+
+#define KVMPPC_XICS_MAX_BUID   0xfff
+#define KVMPPC_XICS_IRQ_COUNT  0x1000
+#define KVMPPC_XICS_BUID_SHIFT 12
+#define KVMPPC_XICS_SRC_MASK   0xfff
+
+#define MASKED 0xff
+
+/* State for one irq in an ics */
+struct ics_irq_state {
+       u32 number;
+       u32 server;
+       u8  priority;
+       u8  saved_priority; /* currently unused */
+       u8  resend;
+       u8  masked_pending;
+       u8  asserted; /* Only for LSI */
+};
+
+#define ICP_RESEND_MAP_SIZE    \
+       ((KVMPPC_XICS_MAX_BUID + BITS_PER_LONG - 1) / BITS_PER_LONG)
+
+/* Atomic ICP state, updated with a single compare & swap */
+union kvmppc_icp_state {
+       unsigned long raw;
+       struct {
+               u8 out_ee : 1;
+               u8 need_resend : 1;
+               u8 cppr;
+               u8 mfrr;
+               u8 pending_pri;
+               u32 xisr;
+       };
+};
+
+struct kvmppc_icp {
+       struct kvm_vcpu *vcpu;
+       union kvmppc_icp_state state;
+       unsigned long resend_map[ICP_RESEND_MAP_SIZE];
+
+       /* Real mode might find something too hard, here's the action
+        * it might request from virtual mode
+        */
+#define XICS_RM_KICK_VCPU      0x1
+#define XICS_RM_CHECK_RESEND   0x2
+#define XICS_RM_REJECT         0x4
+       u32 rm_action;
+       struct kvm_vcpu *rm_kick_target;
+       u32  rm_reject;
+
+       /* Debug stuff for real mode */
+       union kvmppc_icp_state rm_dbgstate;
+       struct kvm_vcpu *rm_dbgtgt;
+};
+
+struct kvmppc_ics {
+       struct mutex lock;
+       u16 buid;
+       u16 nr_irqs;
+       struct ics_irq_state irq_state[];
+};
+
+struct kvmppc_xics {
+       struct kvm *kvm;
+       struct dentry *dentry;
+       u32 max_buid;
+       bool real_mode;
+       bool real_mode_dbg;
+       struct kvmppc_ics *ics[KVMPPC_XICS_MAX_BUID]; /* [1...MAX_BUID] */
+};
+
+static inline struct kvmppc_icp *kvmppc_xics_find_server(struct kvm *kvm,
+                                                        u32 nr)
+{
+       struct kvm_vcpu *vcpu = NULL;
+       int i;
+
+       kvm_for_each_vcpu(i, vcpu, kvm) {
+               if (nr == vcpu->vcpu_id)
+                       return vcpu->arch.icp;
+       }
+       return NULL;
+}
+
+static inline struct kvmppc_ics *kvmppc_xics_find_ics(struct kvmppc_xics *xics,
+                                                     u32 irq, u16 *source)
+{
+       u16 buid = irq >> KVMPPC_XICS_BUID_SHIFT;
+       u16 src = irq & KVMPPC_XICS_SRC_MASK;
+       struct kvmppc_ics *ics;
+
+       if (WARN_ON_ONCE(!buid || buid > KVMPPC_XICS_MAX_BUID))
+               return NULL;
+       ics = xics->ics[buid - 1];
+       if (!ics)
+               return NULL;
+       if (src >= ics->nr_irqs)
+               return NULL;
+       if (source)
+               *source = src;
+       return ics;
+}
+
+
+#endif /* _KVM_PPC_BOOK3S_XICS_H */
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to