[PATCH 09/23] Add 970.c

Alexander Graf Tue, 07 Jul 2009 07:18:06 -0700

This adds the 970 core handling file. Here everything that is generic to
desktop PowerPC cores is handled, including interrupt injections, MSR settings,
etc.


It basically takes over the same role as booke.c for embedded PowerPCs.

Signed-off-by: Alexander Graf <ag...@suse.de>
---
 arch/powerpc/kvm/970.c |  947 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 947 insertions(+), 0 deletions(-)
 create mode 100644 arch/powerpc/kvm/970.c

diff --git a/arch/powerpc/kvm/970.c b/arch/powerpc/kvm/970.c
new file mode 100644
index 0000000..62591e3
--- /dev/null
+++ b/arch/powerpc/kvm/970.c
@@ -0,0 +1,947 @@
+/*
+ * Copyright (C) 2009. SUSE Linux Products GmbH. All rights reserved.
+ *
+ * Authors:
+ *    Alexander Graf <ag...@suse.de>
+ *    Kevin Wolf <m...@kevin-wolf.de>
+ *
+ * Description:
+ * This file is derived from arch/powerpc/kvm/44x.c,
+ * by Hollis Blanchard <holl...@us.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License, version 2, as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/err.h>
+
+#include <asm/reg.h>
+#include <asm/cputable.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+#include <asm/uaccess.h>
+#include <asm/io.h>
+#include <asm/kvm_ppc.h>
+#include <asm/kvm_970.h>
+#include <asm/mmu_context.h>
+#include <linux/sched.h>
+#include <linux/vmalloc.h>
+
+#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU
+
+// #define EXIT_DEBUG
+// #define EXIT_DEBUG_SIMPLE
+
+// XXX we should be able to ld/st guest memory as is, but somehow that breaks 
still
+#define NO_FAST_LD_ST
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+       { "exits",       VCPU_STAT(sum_exits) },
+       { "mmio",        VCPU_STAT(mmio_exits) },
+       { "sig",         VCPU_STAT(signal_exits) },
+       { "sysc",        VCPU_STAT(syscall_exits) },
+       { "inst_emu",    VCPU_STAT(emulated_inst_exits) },
+       { "dec",         VCPU_STAT(dec_exits) },
+       { "ext_intr",    VCPU_STAT(ext_intr_exits) },
+       { "queue_intr",  VCPU_STAT(queue_intr) },
+       { "halt_wakeup", VCPU_STAT(halt_wakeup) },
+       { "pf_storage",  VCPU_STAT(pf_storage) },
+       { "sp_storage",  VCPU_STAT(sp_storage) },
+       { "pf_instruc",  VCPU_STAT(pf_instruc) },
+       { "sp_instruc",  VCPU_STAT(sp_instruc) },
+       { "ld",          VCPU_STAT(ld) },
+       { "ld_slow",     VCPU_STAT(ld_slow) },
+       { "st",          VCPU_STAT(st) },
+       { "st_slow",     VCPU_STAT(st_slow) },
+       { NULL }
+};
+
+void kvmppc_core_load_host_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_load_guest_debugstate(struct kvm_vcpu *vcpu)
+{
+}
+
+void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+{
+       kvmppc_switch_context(vcpu, to_970(vcpu)->old_context);
+}
+
+void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu)
+{
+       to_970(vcpu)->old_context = to_970(vcpu)->context;
+       kvmppc_switch_context(vcpu, CONTEXT_HOST);
+}
+
+void kvmppc_set_msr(struct kvm_vcpu *vcpu, u64 msr)
+{
+       ulong old_msr = vcpu->arch.msr;
+
+#ifdef EXIT_DEBUG
+       printk(KERN_INFO "KVM: Set MSR to 0x%llx\n", msr);
+#endif
+       msr &= to_970(vcpu)->msr_mask;
+       vcpu->arch.msr = msr;
+       vcpu->arch.shadow_msr = msr | MSR_USER32;
+       vcpu->arch.shadow_msr &= ( MSR_VEC | MSR_VSX | MSR_FP | MSR_FE0 |
+                                  MSR_USER64 | MSR_SE | MSR_BE | MSR_DE |
+                                  MSR_FE1);
+
+       if (msr & (MSR_WE|MSR_POW)) {
+               if (!vcpu->arch.pending_exceptions) {
+                       kvm_vcpu_block(vcpu);
+                       vcpu->stat.halt_wakeup++;
+               }
+       }
+
+       if ((vcpu->arch.msr & (MSR_IR|MSR_DR)) != (old_msr & (MSR_IR|MSR_DR)))
+               kvmppc_reload_context(vcpu);
+}
+
+void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags)
+{
+       vcpu->arch.srr0 = vcpu->arch.pc;
+       vcpu->arch.srr1 = vcpu->arch.msr | flags;
+       vcpu->arch.pc = to_970(vcpu)->hior + vec;
+       vcpu->arch.mmu.reset_msr(vcpu);
+}
+
+void kvmppc_970_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec)
+{
+       unsigned int prio;
+
+       vcpu->stat.queue_intr++;
+       switch (vec) {
+       case 0x100: prio = PPC970_IRQPRIO_SYSTEM_RESET;         break;
+       case 0x200: prio = PPC970_IRQPRIO_MACHINE_CHECK;        break;
+       case 0x300: prio = PPC970_IRQPRIO_DATA_STORAGE;         break;
+       case 0x380: prio = PPC970_IRQPRIO_DATA_SEGMENT;         break;
+       case 0x400: prio = PPC970_IRQPRIO_INST_STORAGE;         break;
+       case 0x480: prio = PPC970_IRQPRIO_INST_SEGMENT;         break;
+       case 0x500: prio = PPC970_IRQPRIO_EXTERNAL;             break;
+       case 0x600: prio = PPC970_IRQPRIO_ALIGNMENT;            break;
+       case 0x700: prio = PPC970_IRQPRIO_PROGRAM;              break;
+       case 0x800: prio = PPC970_IRQPRIO_FP_UNAVAIL;           break;
+       case 0x900: prio = PPC970_IRQPRIO_DECREMENTER;          break;
+       case 0xc00: prio = PPC970_IRQPRIO_SYSCALL;              break;
+       case 0xd00: prio = PPC970_IRQPRIO_DEBUG;                break;
+       case 0xf20: prio = PPC970_IRQPRIO_ALTIVEC;              break;
+       case 0xf40: prio = PPC970_IRQPRIO_VSX;                  break;
+       default:    prio = PPC970_IRQPRIO_MAX;                  break;
+       }
+
+       set_bit(prio, &vcpu->arch.pending_exceptions);
+#ifdef EXIT_DEBUG
+       printk(KERN_INFO "Queueing interrupt %x\n", vec);
+#endif
+}
+
+
+void kvmppc_core_queue_program(struct kvm_vcpu *vcpu)
+{
+       kvmppc_970_queue_irqprio(vcpu, PPC970_INTERRUPT_PROGRAM);
+}
+
+void kvmppc_core_queue_dec(struct kvm_vcpu *vcpu)
+{
+       kvmppc_970_queue_irqprio(vcpu, PPC970_INTERRUPT_DECREMENTER);
+}
+
+int kvmppc_core_pending_dec(struct kvm_vcpu *vcpu)
+{
+       return test_bit(PPC970_INTERRUPT_DECREMENTER >> 7, 
&vcpu->arch.pending_exceptions);
+}
+
+void kvmppc_core_queue_external(struct kvm_vcpu *vcpu,
+                                struct kvm_interrupt *irq)
+{
+       kvmppc_970_queue_irqprio(vcpu, PPC970_INTERRUPT_EXTERNAL);
+}
+
+int kvmppc_970_irqprio_deliver(struct kvm_vcpu *vcpu, unsigned int priority)
+{
+       int deliver = 1;
+       int vec = 0;
+
+       switch (priority) {
+       case PPC970_IRQPRIO_DECREMENTER:
+               deliver = vcpu->arch.msr & MSR_EE;
+               vec = PPC970_INTERRUPT_DECREMENTER;
+               break;
+       case PPC970_IRQPRIO_EXTERNAL:
+               deliver = vcpu->arch.msr & MSR_EE;
+               vec = PPC970_INTERRUPT_EXTERNAL;
+               break;
+       case PPC970_IRQPRIO_SYSTEM_RESET:
+               vec = PPC970_INTERRUPT_SYSTEM_RESET;
+               break;
+       case PPC970_IRQPRIO_MACHINE_CHECK:
+               vec = PPC970_INTERRUPT_MACHINE_CHECK;
+               break;
+       case PPC970_IRQPRIO_DATA_STORAGE:
+               vec = PPC970_INTERRUPT_DATA_STORAGE;
+               break;
+       case PPC970_IRQPRIO_INST_STORAGE:
+               vec = PPC970_INTERRUPT_INST_STORAGE;
+               break;
+       case PPC970_IRQPRIO_DATA_SEGMENT:
+               vec = PPC970_INTERRUPT_DATA_SEGMENT;
+               break;
+       case PPC970_IRQPRIO_INST_SEGMENT:
+               vec = PPC970_INTERRUPT_INST_SEGMENT;
+               break;
+       case PPC970_IRQPRIO_ALIGNMENT:
+               vec = PPC970_INTERRUPT_ALIGNMENT;
+               break;
+       case PPC970_IRQPRIO_PROGRAM:
+               vec = PPC970_INTERRUPT_PROGRAM;
+               break;
+       case PPC970_IRQPRIO_VSX:
+               vec = PPC970_INTERRUPT_VSX;
+               break;
+       case PPC970_IRQPRIO_ALTIVEC:
+               vec = PPC970_INTERRUPT_ALTIVEC;
+               break;
+       case PPC970_IRQPRIO_FP_UNAVAIL:
+               vec = PPC970_INTERRUPT_FP_UNAVAIL;
+               break;
+       case PPC970_IRQPRIO_SYSCALL:
+               vec = PPC970_INTERRUPT_SYSCALL;
+               break;
+       case PPC970_IRQPRIO_DEBUG:
+               vec = PPC970_INTERRUPT_TRACE;
+               break;
+       case PPC970_IRQPRIO_PERFORMANCE_MONITOR:
+               vec = PPC970_INTERRUPT_PERFMON;
+               break;
+       default:
+               deliver = 0;
+               printk(KERN_ERR "KVM: Unknown interrupt: 0x%x\n", priority);
+               break;
+       }
+
+#if 0
+       printk(KERN_INFO "Deliver interrupt 0x%x? %x\n", vec, deliver);
+#endif
+
+       if (deliver)
+               kvmppc_inject_interrupt(vcpu, vec, 0ULL);
+
+       return deliver;
+}
+
+void kvmppc_core_deliver_interrupts(struct kvm_vcpu *vcpu)
+{
+       unsigned long *pending = &vcpu->arch.pending_exceptions;
+       unsigned int priority;
+
+#ifdef EXIT_DEBUG
+       if (vcpu->arch.pending_exceptions)
+               printk(KERN_EMERG "KVM: Check pending: %lx\n", 
vcpu->arch.pending_exceptions);
+#endif
+       priority = __ffs(*pending);
+       while (priority <= (sizeof(unsigned int) * 8)) {
+               if (kvmppc_970_irqprio_deliver(vcpu, priority)) {
+                       clear_bit(priority, &vcpu->arch.pending_exceptions);
+                       break;
+               }
+
+               priority = find_next_bit(pending,
+                                        BITS_PER_BYTE * sizeof(*pending),
+                                        priority + 1);
+       }
+}
+
+void kvmppc_set_pvr(struct kvm_vcpu *vcpu, u64 pvr)
+{
+       vcpu->arch.pvr = pvr;
+       if ((pvr >= 0x330000) && (pvr < 0x70330000)) {
+               kvmppc_mmu_970_init(vcpu);
+               to_970(vcpu)->hior = 0xfff00000;
+               to_970(vcpu)->msr_mask = 0xffffffffffffffffULL;
+       } else {
+               kvmppc_mmu_74xx_init(vcpu);
+               to_970(vcpu)->hior = 0;
+               to_970(vcpu)->msr_mask = 0xffffffffULL;
+       }
+
+       /* If we are in hypervisor level, we can set DCBZ to 32 bytes length */
+       vcpu->arch.hflags &= ~PPC970_HFLAG_DCBZ32;
+       if (vcpu->arch.mmu.is_dcbz32(vcpu) && (mfmsr() & MSR_HV))
+               vcpu->arch.hflags |= PPC970_HFLAG_DCBZ32;
+
+}
+
+/* PPC64 and PPC32 have different semantics of dcbz. In order to run PPC32 code
+ * properly on PPC64, we have to make sure we trap dcbz to emulate the correct
+ * PPC32 behavior.
+ *
+ * The PPC64 inventors also realized this case and implemented a special bit
+ * in the HID5 register, which is a hypervisor ressource. Thus we can't use it.
+ *
+ * My approach here is to patch the dcbz instruction on executing pages.
+ */
+static void kvmppc_patch_dcbz(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte)
+{
+       bool touched = false;
+       hva_t hpage;
+       u32 *page;
+       int i;
+
+       kvmppc_switch_context(vcpu, CONTEXT_HOST);
+
+       hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+       if (kvm_is_error_hva(hpage))
+               return;
+
+       hpage |= pte->raddr & ~PAGE_MASK;
+       hpage &= ~0xFFFULL;
+
+       page = vmalloc(HW_PAGE_SIZE);
+
+       if (copy_from_user(page, (void __user *)hpage, HW_PAGE_SIZE))
+               goto out;
+
+       for (i=0; i < HW_PAGE_SIZE / 4; i++)
+               if ((page[i] & 0xff0007ff) == INS_DCBZ) {
+                       page[i] &= 0xfffffff7; // reserved instruction, so we 
trap
+                       touched = true;
+               }
+
+       if (touched)
+               copy_to_user((void __user *)hpage, page, HW_PAGE_SIZE);
+
+out:
+       vfree(page);
+}
+
+static int kvmppc_xlate(struct kvm_vcpu *vcpu, ulong eaddr, bool data,
+                        struct kvmppc_pte *pte)
+{
+       int relocated = (vcpu->arch.msr & (data ? MSR_DR : MSR_IR));
+       int r;
+
+       if (relocated) {
+               kvmppc_switch_context(vcpu, CONTEXT_HOST);
+               r = vcpu->arch.mmu.xlate(vcpu, eaddr, pte, data);
+       } else {
+               pte->eaddr = eaddr;
+               pte->raddr = eaddr & 0xffffffff;
+               pte->vpage = eaddr >> 12;
+               switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+               case 0:
+                       pte->vpage |= VSID_REAL;
+               case MSR_DR:
+                       pte->vpage |= VSID_REAL_DR;
+               case MSR_IR:
+                       pte->vpage |= VSID_REAL_IR;
+               }
+               pte->may_read = true;
+               pte->may_write = true;
+               pte->may_execute = true;
+               r = 0;
+       }
+
+       return r;
+}
+
+static hva_t kvmppc_pte_to_hva(struct kvm_vcpu *vcpu, struct kvmppc_pte *pte,
+                              bool read)
+{
+       hva_t hpage;
+
+       if (read && !pte->may_read)
+               goto err;
+
+       if (!read && !pte->may_write)
+               goto err;
+
+       kvmppc_switch_context(vcpu, CONTEXT_HOST);
+       hpage = gfn_to_hva(vcpu->kvm, pte->raddr >> PAGE_SHIFT);
+       if (kvm_is_error_hva(hpage))
+               goto err;
+
+       return hpage | (pte->raddr & ~PAGE_MASK);
+err:
+       // XXX PAGE_OFFSET is bad_hva(). Maybe we should rather export it?
+       return PAGE_OFFSET;
+}
+
+int kvmppc_st(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr)
+{
+       struct kvmppc_pte pte;
+       hva_t hva = eaddr;
+       int r;
+
+       vcpu->stat.st++;
+
+#ifdef NO_FAST_LD_ST
+       goto heavy_st;
+#endif
+       /* If it's still mapped, let's use the mapping! */
+       if (((eaddr & ~0xFFFULL) != (vcpu->arch.pc & ~0xFFFULL)) &&
+           ((eaddr & ~0xFFFULL) != (vcpu->arch.dear & ~0xFFFULL)))
+               goto heavy_st;
+
+       if (hva >= PAGE_OFFSET)
+               hva |= 0xe000000000000000ULL;
+
+       kvmppc_switch_context(vcpu, CONTEXT_GUEST);
+       r = copy_to_user((void __user *)hva, ptr, size);
+       kvmppc_switch_context(vcpu, CONTEXT_GUEST_END);
+
+       if (!r)
+               return r;
+
+heavy_st:
+
+       vcpu->stat.st_slow++;
+
+       if (kvmppc_xlate(vcpu, eaddr, false, &pte))
+               goto err;
+
+       hva = kvmppc_pte_to_hva(vcpu, &pte, false);
+       if (kvm_is_error_hva(hva))
+               goto err;
+
+       kvmppc_switch_context(vcpu, CONTEXT_HOST);
+
+       if (copy_to_user((void __user *)hva, ptr, size)) {
+               printk(KERN_INFO "kvmppc_st at 0x%lx failed\n", hva);
+               goto err;
+       }
+
+// XXX If we'd map the SLB entry + page for missed addreses, we could almost 
always
+//     use copy_to_user
+#if 0
+       // We don't want to go through the hpte again, so let's map the page
+       kvmppc_mmu_map_page(vcpu, &pte);
+#endif
+
+       return 0;
+
+err:
+       return -ENOENT;
+}
+
+int kvmppc_ld(struct kvm_vcpu *vcpu, ulong eaddr, int size, void *ptr,
+                     bool data)
+{
+       struct kvmppc_pte pte;
+       hva_t hva = eaddr;
+       int r = 0;
+
+       vcpu->stat.ld++;
+
+#ifdef NO_FAST_LD_ST
+       goto heavy_ld;
+#endif
+
+       /* If it's still mapped, let's use the mapping! */
+       if (hva >= PAGE_OFFSET)
+               hva |= 0xe000000000000000ULL;
+
+       kvmppc_switch_context(vcpu, CONTEXT_GUEST);
+       r = copy_from_user(ptr, (void __user *)hva, size);
+       kvmppc_switch_context(vcpu, CONTEXT_GUEST_END);
+       if (!r)
+               return r;
+
+#ifdef NO_FAST_LD_ST
+heavy_ld:
+#endif
+
+       vcpu->stat.ld_slow++;
+
+       if (kvmppc_xlate(vcpu, eaddr, data, &pte))
+               goto err;
+
+       hva = kvmppc_pte_to_hva(vcpu, &pte, true);
+       if (kvm_is_error_hva(hva))
+               goto err;
+
+       kvmppc_switch_context(vcpu, CONTEXT_HOST);
+
+       if (copy_from_user(ptr, (void __user *)hva, size)) {
+               printk(KERN_INFO "kvmppc_ld at 0x%lx failed\n", hva);
+               goto err;
+       }
+
+       return 0;
+
+err:
+       return -ENOENT;
+}
+
+static int kvmppc_visible_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
+{
+       kvmppc_switch_context(vcpu, CONTEXT_HOST);
+       return kvm_is_visible_gfn(vcpu->kvm, gfn);
+}
+
+int kvmppc_handle_pagefault(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                           ulong eaddr, int vec)
+{
+       bool data = (vec == PPC970_INTERRUPT_DATA_STORAGE);
+       int r = RESUME_GUEST;
+       int relocated;
+       int page_found = 0;
+       struct kvmppc_pte pte;
+       bool is_mmio = false;
+
+       if ( vec == PPC970_INTERRUPT_DATA_STORAGE ) {
+               relocated = (vcpu->arch.msr & MSR_DR);
+       } else {
+               relocated = (vcpu->arch.msr & MSR_IR);
+       }
+
+       /* Resolve real address if translation turned on */
+       if (relocated) {
+               ulong vp = vcpu->arch.mmu.ea_to_vp(vcpu, eaddr, data);
+
+               /* If we just hit that address on last MMIO, let's
+                * reuse the values we found out there so we don't
+                * need to read the guest ptes */
+               if (vp && (vp == vcpu->arch.last_mmio)) {
+                       pte.raddr = (vcpu->arch.last_mmio_raddr & ~0xFFFULL) |
+                                   (eaddr & 0xFFFULL);
+                       is_mmio = true;
+               } else {
+                       page_found = vcpu->arch.mmu.xlate(vcpu, eaddr, &pte,
+                                                         data);
+               }
+       } else {
+               pte.may_execute = true;
+               pte.may_read = true;
+               pte.may_write = true;
+               pte.raddr = eaddr & 0xffffffff;
+               pte.eaddr = eaddr;
+               pte.vpage = eaddr >> 12;
+               switch (vcpu->arch.msr & (MSR_DR|MSR_IR)) {
+               case 0:
+                       pte.vpage |= VSID_REAL;
+               case MSR_DR:
+                       pte.vpage |= VSID_REAL_DR;
+               case MSR_IR:
+                       pte.vpage |= VSID_REAL_IR;
+               }
+       }
+
+       if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+          (!(vcpu->arch.hflags & PPC970_HFLAG_DCBZ32))) {
+               /*
+                * If we do the dcbz hack, we have to NX on every execution,
+                * so we can patch the executing code. This renders our guest
+                * NX-less.
+                */
+               pte.may_execute = !data;
+       }
+
+       if (page_found == -ENOENT) {
+               /* Page not found in guest PTE entries */
+               vcpu->arch.dear = vcpu->arch.fault_dear;
+               to_970(vcpu)->dsisr = vcpu->arch.fault_dsisr;
+               vcpu->arch.msr |= (vcpu->arch.shadow_msr & 
0x00000000f8000000ULL);
+               kvmppc_970_queue_irqprio(vcpu, vec);
+       } else if (page_found == -EPERM) {
+               /* Storage protection */
+               vcpu->arch.dear = vcpu->arch.fault_dear;
+               to_970(vcpu)->dsisr = vcpu->arch.fault_dsisr & ~DSISR_NOHPTE;
+               to_970(vcpu)->dsisr |= DSISR_PROTFAULT;
+               vcpu->arch.msr |= (vcpu->arch.shadow_msr & 
0x00000000f8000000ULL);
+               kvmppc_970_queue_irqprio(vcpu, vec);
+       } else if (page_found == -EINVAL) {
+               /* Page not found in guest SLB */
+               vcpu->arch.dear = vcpu->arch.fault_dear;
+               kvmppc_970_queue_irqprio(vcpu, vec + 0x80);
+       } else if (!is_mmio &&
+                  kvmppc_visible_gfn(vcpu, pte.raddr >> PAGE_SHIFT)) {
+               /* The guest's PTE is not mapped yet. Map on the host */
+               kvmppc_mmu_map_page(vcpu, &pte);
+               if (data)
+                       vcpu->stat.sp_storage++;
+               else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+                       (!(vcpu->arch.hflags & PPC970_HFLAG_DCBZ32)))
+                       kvmppc_patch_dcbz(vcpu, &pte);
+       } else {
+               /* MMIO */
+               vcpu->stat.mmio_exits++;
+               vcpu->arch.paddr_accessed = pte.raddr;
+               vcpu->arch.last_mmio_raddr = pte.raddr;
+               vcpu->arch.last_mmio = pte.vpage;
+               r = kvmppc_emulate_mmio(run, vcpu);
+               if ( r == RESUME_HOST_NV )
+                       r = RESUME_HOST;
+               if ( r == RESUME_GUEST_NV )
+                       r = RESUME_GUEST;
+       }
+
+       return r;
+}
+
+#ifdef EXIT_DEBUG
+static u32 kvmppc_get_dec(struct kvm_vcpu *vcpu)
+{
+       u64 jd = mftb() - vcpu->arch.dec_jiffies;
+       return vcpu->arch.dec - jd;
+}
+#endif
+
+int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+                       unsigned int exit_nr)
+{
+       int r = RESUME_HOST;
+
+       vcpu->stat.sum_exits++;
+       kvmppc_switch_context(vcpu, CONTEXT_GUEST_END);
+       local_irq_enable();
+
+       run->exit_reason = KVM_EXIT_UNKNOWN;
+       run->ready_for_interrupt_injection = 1;
+#ifdef EXIT_DEBUG
+       printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | dec=0x%x | 
msr=0x%lx\n",
+               exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
+               kvmppc_get_dec(vcpu), vcpu->arch.msr);
+#elif defined (EXIT_DEBUG_SIMPLE)
+       if ((exit_nr != 0x900) && (exit_nr != 0x500))
+               printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | dar=0x%lx | 
msr=0x%lx\n",
+                       exit_nr, vcpu->arch.pc, vcpu->arch.fault_dear,
+                       vcpu->arch.msr);
+#endif
+       kvm_resched(vcpu);
+       switch (exit_nr) {
+       case PPC970_INTERRUPT_INST_STORAGE:
+               vcpu->stat.pf_instruc++;
+               /* only care about PTEG not found errors, but leave NX alone */
+               if (vcpu->arch.shadow_msr & 0x40000000) {
+                       r = kvmppc_handle_pagefault(run, vcpu, vcpu->arch.pc, 
exit_nr);
+                       vcpu->stat.sp_instruc++;
+               } else if (vcpu->arch.mmu.is_dcbz32(vcpu) &&
+                         (!(vcpu->arch.hflags & PPC970_HFLAG_DCBZ32))) {
+                       /*
+                        * XXX If we do the dcbz hack we use the NX bit to 
flush&patch the page,
+                        *     so we can't use the NX bit inside the guest. 
Let's cross our fingers,
+                        *     that no guest that needs the dcbz hack does NX.
+                        */
+                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
+               } else {
+                       vcpu->arch.msr |= (vcpu->arch.shadow_msr & 0x58000000);
+                       kvmppc_970_queue_irqprio(vcpu, exit_nr);
+                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.pc, ~0xFFFULL);
+                       r = RESUME_GUEST;
+               }
+               break;
+       case PPC970_INTERRUPT_DATA_STORAGE:
+               vcpu->stat.pf_storage++;
+               /* The only case we need to handle is missing shadow PTEs */
+               if (vcpu->arch.fault_dsisr & DSISR_NOHPTE) {
+                       r = kvmppc_handle_pagefault(run, vcpu, 
vcpu->arch.fault_dear, exit_nr);
+               } else {
+                       vcpu->arch.dear = vcpu->arch.fault_dear;
+                       to_970(vcpu)->dsisr = vcpu->arch.fault_dsisr;
+                       kvmppc_970_queue_irqprio(vcpu, exit_nr);
+                       kvmppc_mmu_pte_flush(vcpu, vcpu->arch.dear, ~0xFFFULL);
+                       r = RESUME_GUEST;
+               }
+               break;
+       case PPC970_INTERRUPT_DATA_SEGMENT:
+               local_irq_disable();
+               kvmppc_mmu_map_segment(vcpu, vcpu->arch.fault_dear);
+               local_irq_enable();
+               r = RESUME_GUEST;
+               break;
+       case PPC970_INTERRUPT_INST_SEGMENT:
+               local_irq_disable();
+               kvmppc_mmu_map_segment(vcpu, vcpu->arch.pc);
+               local_irq_enable();
+               r = RESUME_GUEST;
+               break;
+       /* We're good on these - the host merely wanted to get our attention */
+       case PPC970_INTERRUPT_DECREMENTER:
+               vcpu->stat.dec_exits++;
+               r = RESUME_GUEST;
+               break;
+       case PPC970_INTERRUPT_EXTERNAL:
+               vcpu->stat.ext_intr_exits++;
+               r = RESUME_GUEST;
+               break;
+       case PPC970_INTERRUPT_PROGRAM:
+       {
+               enum emulation_result er;
+
+               if (vcpu->arch.msr & MSR_PR) {
+#ifdef EXIT_DEBUG
+                       printk(KERN_INFO "Userspace triggered 0x700 exception 
at 0x%lx (0x%x)\n", vcpu->arch.pc, vcpu->arch.last_inst);
+#endif
+                       if ((vcpu->arch.last_inst & 0xff0007ff) !=
+                           (INS_DCBZ & 0xfffffff7)) {
+                               kvmppc_970_queue_irqprio(vcpu, exit_nr);
+                               r = RESUME_GUEST;
+                               break;
+                       }
+               }
+
+               vcpu->stat.emulated_inst_exits++;
+               er = kvmppc_emulate_instruction(run, vcpu);
+               switch (er) {
+               case EMULATE_DONE:
+                       r = RESUME_GUEST;
+                       break;
+               case EMULATE_FAIL:
+                       printk(KERN_CRIT "%s: emulation at %lx failed (%08x)\n",
+                              __func__, vcpu->arch.pc, vcpu->arch.last_inst);
+                       kvmppc_970_queue_irqprio(vcpu, exit_nr);
+                       r = RESUME_GUEST;
+                       break;
+               default:
+                       BUG();
+               }
+               break;
+       }
+       case PPC970_INTERRUPT_SYSCALL:
+#ifdef EXIT_DEBUG
+               printk(KERN_INFO "Syscall Nr %d\n", (int)vcpu->arch.gpr[0]);
+#endif
+               vcpu->stat.syscall_exits++;
+               kvmppc_970_queue_irqprio(vcpu, exit_nr);
+               r = RESUME_GUEST;
+               break;
+       case PPC970_INTERRUPT_MACHINE_CHECK:
+       case PPC970_INTERRUPT_FP_UNAVAIL:
+       case PPC970_INTERRUPT_TRACE:
+       case PPC970_INTERRUPT_ALTIVEC:
+       case PPC970_INTERRUPT_VSX:
+               kvmppc_970_queue_irqprio(vcpu, exit_nr);
+               r = RESUME_GUEST;
+               break;
+       default:
+               /* Ugh - bork here! What did we get? */
+               printk(KERN_EMERG "exit_nr=0x%x | pc=0x%lx | msr=0x%lx\n", 
exit_nr, vcpu->arch.pc, vcpu->arch.shadow_msr);
+               r = RESUME_HOST;
+               BUG();
+               break;
+       }
+
+
+       if (!(r & RESUME_HOST)) {
+               /* To avoid clobbering exit_reason, only check for signals if
+                * we aren't already exiting to userspace for some other
+                * reason. */
+               if (signal_pending(current)) {
+#ifdef EXIT_DEBUG
+                       printk(KERN_EMERG "KVM: Going back to host\n");
+#endif
+                       vcpu->stat.signal_exits++;
+                       run->exit_reason = KVM_EXIT_INTR;
+                       r = -EINTR;
+               } else {
+                       /* In case an interrupt came in that was triggered
+                        * from userspace (like DEC), we need to check what
+                        * to inject now! */
+                       kvmppc_core_deliver_interrupts(vcpu);
+               }
+       }
+
+#ifdef EXIT_DEBUG
+       printk(KERN_EMERG "KVM exit: vcpu=0x%p pc=0x%lx r=0x%x\n", vcpu, 
vcpu->arch.pc, r);
+#endif
+
+       if (r == RESUME_GUEST)
+               kvmppc_switch_context(vcpu, CONTEXT_GUEST);
+
+       local_irq_disable();
+
+       return r;
+}
+
+int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
+{
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+       int i;
+
+       regs->pc = vcpu->arch.pc;
+       regs->cr = vcpu->arch.cr;
+       regs->ctr = vcpu->arch.ctr;
+       regs->lr = vcpu->arch.lr;
+       regs->xer = vcpu->arch.xer;
+       regs->msr = vcpu->arch.msr;
+       regs->srr0 = vcpu->arch.srr0;
+       regs->srr1 = vcpu->arch.srr1;
+       regs->pid = vcpu->arch.pid;
+       regs->sprg0 = vcpu->arch.sprg0;
+       regs->sprg1 = vcpu->arch.sprg1;
+       regs->sprg2 = vcpu->arch.sprg2;
+       regs->sprg3 = vcpu->arch.sprg3;
+       regs->sprg5 = vcpu->arch.sprg4;
+       regs->sprg6 = vcpu->arch.sprg5;
+       regs->sprg7 = vcpu->arch.sprg6;
+
+       for (i = 0; i < ARRAY_SIZE(regs->gpr); i++)
+               regs->gpr[i] = vcpu->arch.gpr[i];
+
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+       int i;
+
+       vcpu->arch.pc = regs->pc;
+       vcpu->arch.cr = regs->cr;
+       vcpu->arch.ctr = regs->ctr;
+       vcpu->arch.lr = regs->lr;
+       vcpu->arch.xer = regs->xer;
+       kvmppc_set_msr(vcpu, regs->msr);
+       vcpu->arch.srr0 = regs->srr0;
+       vcpu->arch.srr1 = regs->srr1;
+       vcpu->arch.sprg0 = regs->sprg0;
+       vcpu->arch.sprg1 = regs->sprg1;
+       vcpu->arch.sprg2 = regs->sprg2;
+       vcpu->arch.sprg3 = regs->sprg3;
+       vcpu->arch.sprg5 = regs->sprg4;
+       vcpu->arch.sprg6 = regs->sprg5;
+       vcpu->arch.sprg7 = regs->sprg6;
+
+       for (i = 0; i < ARRAY_SIZE(vcpu->arch.gpr); i++)
+               vcpu->arch.gpr[i] = regs->gpr[i];
+
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+       sregs->pvr = vcpu->arch.pvr;
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
+                                  struct kvm_sregs *sregs)
+{
+       kvmppc_set_pvr(vcpu, sregs->pvr);
+       return 0;
+}
+
+int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+       return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+       return -ENOTSUPP;
+}
+
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+                                  struct kvm_translation *tr)
+{
+       return 0;
+}
+
+int kvmppc_core_check_processor_compat(void)
+{
+       return 0;
+}
+
+struct kvm_vcpu *kvmppc_core_vcpu_create(struct kvm *kvm, unsigned int id)
+{
+       struct kvmppc_vcpu_970 *vcpu_970;
+       struct kvm_vcpu *vcpu;
+       int err;
+
+       vcpu_970 = (struct kvmppc_vcpu_970 *)__get_free_pages( GFP_KERNEL | 
__GFP_ZERO,
+                       get_order(sizeof(struct kvmppc_vcpu_970)));
+       if (!vcpu_970) {
+               err = -ENOMEM;
+               goto out;
+       }
+
+       vcpu = &vcpu_970->vcpu;
+       err = kvm_vcpu_init(vcpu, kvm, id);
+       if (err)
+               goto free_vcpu;
+
+       vcpu->arch.host_retip = kvm_return_point;
+       vcpu->arch.host_msr = mfmsr();
+       /* default to 970fx */
+       vcpu->arch.pvr = 0x3C0301;
+       kvmppc_set_pvr(vcpu, vcpu->arch.pvr);
+       vcpu_970->slb_nr = 64;
+
+       /* remember where some real-mode handlers are */
+       vcpu->arch.trampoline_lowmem = kvmppc_trampoline_lowmem;
+       vcpu->arch.trampoline_enter = kvmppc_trampoline_enter;
+       vcpu->arch.highmem_handler = (ulong)kvmppc_handler_highmem;
+
+       vcpu->arch.shadow_msr = MSR_USER64;
+
+       // XXX move mm_guest to to_970()
+       vcpu->arch.mm_guest = mm_alloc();
+       err = init_new_context(current, vcpu->arch.mm_guest);
+       if (err)
+               goto free_vcpu;
+       to_970(vcpu)->vsid_max = ((vcpu->arch.mm_guest->context.id + 1) << 
USER_ESID_BITS) - 1;
+       to_970(vcpu)->vsid_first = vcpu->arch.mm_guest->context.id << 
USER_ESID_BITS;
+       to_970(vcpu)->vsid_next = to_970(vcpu)->vsid_first;
+
+       return vcpu;
+
+free_vcpu:
+       free_pages(GFP_KERNEL, get_order(sizeof(struct kvmppc_vcpu_970)));
+out:
+       return ERR_PTR(err);
+}
+
+void kvmppc_core_vcpu_free(struct kvm_vcpu *vcpu)
+{
+       struct kvmppc_vcpu_970 *vcpu_970 = to_970(vcpu);
+
+       kvm_vcpu_uninit(vcpu);
+       free_pages((long)vcpu_970, get_order(sizeof(struct kvmppc_vcpu_970)));
+       // XXX free mm contextes
+}
+
+extern int __kvmppc_vcpu_entry(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu);
+int __kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+{
+       int ret;
+
+       /* No need to go into the guest when all we do is going out */
+       if (signal_pending(current)) {
+               kvm_run->exit_reason = KVM_EXIT_INTR;
+               return -EINTR;
+       }
+
+       /*
+        * Clear the SLB so that only kernel entries remain
+        * and all user entries are safe to use for the guest
+        */
+       vcpu->arch.mm_host = current->mm;
+       kvmppc_switch_context(vcpu, CONTEXT_GUEST);
+
+       ret = __kvmppc_vcpu_entry(kvm_run, vcpu);
+
+       kvmppc_switch_context(vcpu, CONTEXT_HOST);
+       return ret;
+}
+
+static int kvmppc_970_init(void)
+{
+       return kvm_init(NULL, sizeof(struct kvmppc_vcpu_970), THIS_MODULE);
+}
+
+static void kvmppc_970_exit(void)
+{
+       kvm_exit();
+}
+
+module_init(kvmppc_970_init);
+module_exit(kvmppc_970_exit);
-- 
1.6.0.2

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 09/23] Add 970.c

Reply via email to