[PATCH v5 0/8] In-kernel XICS interrupt controller emulation
This is a repost of my patch series implementing in-kernel emulation of the XICS interrupt controller architecture defined in PAPR (Power Architecture Platform Requirements, the document that defines IBM's pSeries platform architecture). This version of the patch series uses the latest device API as posted by Scott Wood, that is, i.e., the version where the core device code provides the file descriptor and ioctl handler. I have structured the series so that the API is added by the last two patches, so as to be able to accommodate any future revisions to the device API with minimal changes. The series is based on Alex Graf's kvm-ppc-queue branch with Scott Wood's recent patch series applied on top, together with the patch below to allow it to build with CONFIG_KVM_MPIC=n. The API defined here uses KVM_CREATE_DEVICE to create the XICS, KVM_DEVICE_SET_ATTR/KVM_DEVICE_GET_ATTR to manipulate the interrupt sources (for initialization and migration), a new KVM_CAP_IRQ_XICS capability to connect vcpus to the XICS, a new identifier KVM_REG_PPC_ICP_STATE for the one-reg interface to get and set per-vcpu state, and the existing KVM_IRQ_LINE ioctl to assert and deassert interrupt sources. This version also cleans up some checkpatch.pl errors and clarifies the lifetime rules for the various objects. There are two checkpatch warnings for long lines, but they are long because they have long strings in them, and if I break the strings over two lines then checkpatch warns about that. Paul. diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1d3888b..bd41eea 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -466,9 +466,11 @@ void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu) kvmppc_remove_vcpu_debugfs(vcpu); switch (vcpu-arch.irq_type) { +#ifdef CONFIG_KVM_MPIC case KVMPPC_IRQ_MPIC: kvmppc_mpic_disconnect_vcpu(vcpu-arch.mpic, vcpu); break; +#endif } kvmppc_core_vcpu_free(vcpu); -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/8] KVM: PPC: Book3S: Facilities to save/restore XICS presentation ctrler state
This adds the ability for userspace to save and restore the state of the XICS interrupt presentation controllers (ICPs) via the KVM_GET/SET_ONE_REG interface. Since there is one ICP per vcpu, we simply define a new 64-bit register in the ONE_REG space for the ICP state. The state includes the CPU priority setting, the pending IPI priority, and the priority and source number of any pending external interrupt. Signed-off-by: Paul Mackerras pau...@samba.org --- Documentation/virtual/kvm/api.txt |1 + arch/powerpc/include/asm/kvm_ppc.h |2 + arch/powerpc/include/uapi/asm/kvm.h | 12 + arch/powerpc/kvm/book3s.c | 19 arch/powerpc/kvm/book3s_xics.c | 90 +++ 5 files changed, 124 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4247d65..54bb6ad 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1792,6 +1792,7 @@ registers, find a list below: PPC | KVM_REG_PPC_TSR | 32 PPC | KVM_REG_PPC_OR_TSR | 32 PPC | KVM_REG_PPC_CLEAR_TSR| 32 + PPC | KVM_REG_PPC_ICP_STATE | 64 ARM registers are mapped using the lower 32 bits. The upper 16 of that is the register group type, or coprocessor number: diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 962fcc6..44c8ea1 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -313,6 +313,8 @@ extern void kvmppc_xics_free_icp(struct kvm_vcpu *vcpu); extern int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu, unsigned long server); extern int kvm_vm_ioctl_xics_irq(struct kvm *kvm, struct kvm_irq_level *args); extern int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 cmd); +extern u64 kvmppc_xics_get_icp(struct kvm_vcpu *vcpu); +extern int kvmppc_xics_set_icp(struct kvm_vcpu *vcpu, u64 icpval); #else static inline int kvmppc_xics_enabled(struct kvm_vcpu *vcpu) { return 0; } diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 18be86c..1388282 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -432,6 +432,18 @@ struct kvm_get_htab_header { /* Debugging: Special instruction for software breakpoint */ #define KVM_REG_PPC_DEBUG_INST (KVM_REG_PPC | KVM_REG_SIZE_U32 | 0x8b) +/* Per-vcpu XICS interrupt controller state */ +#define KVM_REG_PPC_ICP_STATE (KVM_REG_PPC | KVM_REG_SIZE_U64 | 0x8c) + +#define KVM_REG_PPC_ICP_CPPR_SHIFT56 /* current proc priority */ +#define KVM_REG_PPC_ICP_CPPR_MASK 0xff +#define KVM_REG_PPC_ICP_XISR_SHIFT32 /* interrupt status field */ +#define KVM_REG_PPC_ICP_XISR_MASK 0xff +#define KVM_REG_PPC_ICP_MFRR_SHIFT24 /* pending IPI priority */ +#define KVM_REG_PPC_ICP_MFRR_MASK 0xff +#define KVM_REG_PPC_ICP_PPRI_SHIFT16 /* pending irq priority */ +#define KVM_REG_PPC_ICP_PPRI_MASK 0xff + /* Device control API: PPC-specific devices */ #define KVM_DEV_MPIC_GRP_MISC 1 #define KVM_DEV_MPIC_BASE_ADDR 0 /* 64-bit */ diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index 10cd398..185c472 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -535,6 +535,15 @@ int kvm_vcpu_ioctl_get_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) opcode, sizeof(u32)); break; } +#ifdef CONFIG_KVM_XICS + case KVM_REG_PPC_ICP_STATE: + if (!vcpu-arch.icp) { + r = -ENXIO; + break; + } + val = get_reg_val(reg-id, kvmppc_xics_get_icp(vcpu)); + break; +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; @@ -597,6 +606,16 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, struct kvm_one_reg *reg) vcpu-arch.vscr.u[3] = set_reg_val(reg-id, val); break; #endif /* CONFIG_ALTIVEC */ +#ifdef CONFIG_KVM_XICS + case KVM_REG_PPC_ICP_STATE: + if (!vcpu-arch.icp) { + r = -ENXIO; + break; + } + r = kvmppc_xics_set_icp(vcpu, + set_reg_val(reg-id, val)); + break; +#endif /* CONFIG_KVM_XICS */ default: r = -EINVAL; break; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 9fb2d39..ee841ed 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -954,6 +954,96 @@ int kvmppc_xics_create_icp(struct kvm_vcpu *vcpu,
[PATCH 5/8] KVM: PPC: Book3S HV: Improve real-mode handling of external interrupts
This streamlines our handling of external interrupts that come in while we're in the guest. First, when waking up a hardware thread that was napping, we split off the napping due to H_CEDE case earlier, and use the code that handles an external interrupt (0x500) in the guest to handle that too. Secondly, the code that handles those external interrupts now checks if any other thread is exiting to the host before bouncing an external interrupt to the guest, and also checks that there is actually an external interrupt pending for the guest before setting the LPCR MER bit (mediated external request). This also makes sure that we clear the ceded flag when we handle a wakeup from cede in real mode, and fixes a potential infinite loop in kvmppc_run_vcpu() which can occur if we ever end up with the ceded flag set but MSR[EE] off. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/reg.h |1 + arch/powerpc/kvm/book3s_hv.c|5 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 138 +-- 3 files changed, 80 insertions(+), 64 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index c9c67fc..7993224 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -290,6 +290,7 @@ #define LPCR_PECE1 0x2000 /* decrementer can cause exit */ #define LPCR_PECE2 0x1000 /* machine check etc can cause exit */ #define LPCR_MER 0x0800 /* Mediated External Exception */ +#define LPCR_MER_SH 11 #define LPCR_LPES0x000c #define LPCR_LPES0 0x0008 /* LPAR Env selector 0 */ #define LPCR_LPES1 0x0004 /* LPAR Env selector 1 */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 0423c81..5e87599 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1376,9 +1376,12 @@ static int kvmppc_run_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu) break; vc-runner = vcpu; n_ceded = 0; - list_for_each_entry(v, vc-runnable_threads, arch.run_list) + list_for_each_entry(v, vc-runnable_threads, arch.run_list) { if (!v-arch.pending_exceptions) n_ceded += v-arch.ceded; + else + v-arch.ceded = 0; + } if (n_ceded == vc-n_runnable) kvmppc_vcore_blocked(vc); else diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index c35ae06..73ec955 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -97,50 +97,51 @@ kvm_start_guest: li r0,1 stb r0,PACA_NAPSTATELOST(r13) - /* get vcpu pointer, NULL if we have no vcpu to run */ - ld r4,HSTATE_KVM_VCPU(r13) - cmpdi cr1,r4,0 + /* were we napping due to cede? */ + lbz r0,HSTATE_NAPPING(r13) + cmpwi r0,0 + bne kvm_end_cede + + /* +* We weren't napping due to cede, so this must be a secondary +* thread being woken up to run a guest, or being woken up due +* to a stray IPI. (Or due to some machine check or hypervisor +* maintenance interrupt while the core is in KVM.) +*/ /* Check the wake reason in SRR1 to see why we got here */ mfspr r3,SPRN_SRR1 rlwinm r3,r3,44-31,0x7 /* extract wake reason field */ cmpwi r3,4/* was it an external interrupt? */ - bne 27f - - /* -* External interrupt - for now assume it is an IPI, since we -* should never get any other interrupts sent to offline threads. -* Only do this for secondary threads. -*/ - beq cr1,25f - lwz r3,VCPU_PTID(r4) - cmpwi r3,0 - beq 27f -25:ld r5,HSTATE_XICS_PHYS(r13) - li r0,0xff - li r6,XICS_MFRR - li r7,XICS_XIRR + bne 27f /* if not */ + ld r5,HSTATE_XICS_PHYS(r13) + li r7,XICS_XIRR/* if it was an external interrupt, */ lwzcix r8,r5,r7/* get and ack the interrupt */ sync clrldi. r9,r8,40/* get interrupt source ID. */ - beq 27f /* none there? */ - cmpwi r9,XICS_IPI - bne 26f + beq 28f /* none there? */ + cmpwi r9,XICS_IPI /* was it an IPI? */ + bne 29f + li r0,0xff + li r6,XICS_MFRR stbcix r0,r5,r6/* clear IPI */ -26:stwcix r8,r5,r7/* EOI the interrupt */ + stwcix r8,r5,r7/* EOI the interrupt */ + sync
[PATCH 2/8] KVM: PPC: Book3S: Add kernel emulation for the XICS interrupt controller
From: Benjamin Herrenschmidt b...@kernel.crashing.org This adds in-kernel emulation of the XICS (eXternal Interrupt Controller Specification) interrupt controller specified by PAPR, for both HV and PR KVM guests. The XICS emulation supports up to 1048560 interrupt sources. Interrupt source numbers below 16 are reserved; 0 is used to mean no interrupt and 2 is used for IPIs. Internally these are represented in blocks of 1024, called ICS (interrupt controller source) entities, but that is not visible to userspace. Each vcpu gets one ICP (interrupt controller presentation) entity, used to store the per-vcpu state such as vcpu priority, pending interrupt state, IPI request, etc. This does not include any API or any way to connect vcpus to their ICP state; that will be added in later patches. This is based on an initial implementation by Michael Ellerman mich...@ellerman.id.au reworked by Benjamin Herrenschmidt and Paul Mackerras. Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s.h |2 + arch/powerpc/include/asm/kvm_host.h | 11 + arch/powerpc/include/asm/kvm_ppc.h| 29 + arch/powerpc/kvm/Kconfig |8 + arch/powerpc/kvm/Makefile |3 + arch/powerpc/kvm/book3s.c |2 +- arch/powerpc/kvm/book3s_hv.c |9 + arch/powerpc/kvm/book3s_pr_papr.c | 14 + arch/powerpc/kvm/book3s_rtas.c| 54 +- arch/powerpc/kvm/book3s_xics.c| 946 + arch/powerpc/kvm/book3s_xics.h| 113 arch/powerpc/kvm/powerpc.c|3 + 12 files changed, 1192 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/kvm/book3s_xics.c create mode 100644 arch/powerpc/kvm/book3s_xics.h diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index bc81842..0d9b32c 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -142,6 +142,8 @@ extern int kvmppc_mmu_hv_init(void); extern int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, bool data); extern void kvmppc_book3s_queue_irqprio(struct kvm_vcpu *vcpu, unsigned int vec); +extern void kvmppc_book3s_dequeue_irqprio(struct kvm_vcpu *vcpu, + unsigned int vec); extern void kvmppc_inject_interrupt(struct kvm_vcpu *vcpu, int vec, u64 flags); extern void kvmppc_set_bat(struct kvm_vcpu *vcpu, struct kvmppc_bat *bat, bool upper, u32 val); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 1650e26..3d34570 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -188,6 +188,10 @@ struct kvmppc_linear_info { int type; }; +/* XICS components, defined in boo3s_xics.c */ +struct kvmppc_xics; +struct kvmppc_icp; + /* * The reverse mapping array has one entry for each HPTE, * which stores the guest's view of the second word of the HPTE @@ -257,6 +261,9 @@ struct kvm_arch { struct list_head spapr_tce_tables; struct list_head rtas_tokens; #endif +#ifdef CONFIG_KVM_XICS + struct kvmppc_xics *xics; +#endif }; /* @@ -378,6 +385,7 @@ struct kvmppc_booke_debug_reg { #define KVMPPC_IRQ_DEFAULT 0 #define KVMPPC_IRQ_MPIC1 +#define KVMPPC_IRQ_XICS2 struct openpic; @@ -563,6 +571,9 @@ struct kvm_vcpu_arch { int irq_type; /* one of KVM_IRQ_* */ int irq_cpu_id; struct openpic *mpic; /* KVM_IRQ_MPIC */ +#ifdef CONFIG_KVM_XICS + struct kvmppc_icp *icp; /* XICS presentation controller */ +#endif #ifdef CONFIG_KVM_BOOK3S_64_HV struct kvm_vcpu_arch_shared shregs; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 948b4c2..7cb8a71 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -130,6 +130,7 @@ extern long kvmppc_prepare_vrma(struct kvm *kvm, extern void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long porder); extern int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu); + extern long kvm_vm_ioctl_create_spapr_tce(struct kvm *kvm, struct kvm_create_spapr_tce *args); extern long kvmppc_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, @@ -169,6 +170,10 @@ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); extern void kvmppc_rtas_tokens_free(struct kvm *kvm); +extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, +
[PATCH 3/8] KVM: PPC: Book3S HV: Speed up wakeups of CPUs on HV KVM
From: Benjamin Herrenschmidt b...@kernel.crashing.org Currently, we wake up a CPU by sending a host IPI with smp_send_reschedule() to thread 0 of that core, which will take all threads out of the guest, and cause them to re-evaluate their interrupt status on the way back in. This adds a mechanism to differentiate real host IPIs from IPIs sent by KVM for guest threads to poke each other, in order to target the guest threads precisely when possible and avoid that global switch of the core to host state. We then use this new facility in the in-kernel XICS code. Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s_asm.h |8 ++- arch/powerpc/include/asm/kvm_ppc.h| 29 arch/powerpc/kernel/asm-offsets.c |2 + arch/powerpc/kvm/book3s_hv.c | 26 +++- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 102 - arch/powerpc/kvm/book3s_xics.c|2 +- arch/powerpc/sysdev/xics/icp-native.c |8 +++ 7 files changed, 158 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_asm.h b/arch/powerpc/include/asm/kvm_book3s_asm.h index cdc3d27..9039d3c 100644 --- a/arch/powerpc/include/asm/kvm_book3s_asm.h +++ b/arch/powerpc/include/asm/kvm_book3s_asm.h @@ -20,6 +20,11 @@ #ifndef __ASM_KVM_BOOK3S_ASM_H__ #define __ASM_KVM_BOOK3S_ASM_H__ +/* XICS ICP register offsets */ +#define XICS_XIRR 4 +#define XICS_MFRR 0xc +#define XICS_IPI 2 /* interrupt source # for IPIs */ + #ifdef __ASSEMBLY__ #ifdef CONFIG_KVM_BOOK3S_HANDLER @@ -81,10 +86,11 @@ struct kvmppc_host_state { #ifdef CONFIG_KVM_BOOK3S_64_HV u8 hwthread_req; u8 hwthread_state; - + u8 host_ipi; struct kvm_vcpu *kvm_vcpu; struct kvmppc_vcore *kvm_vcore; unsigned long xics_phys; + u32 saved_xirr; u64 dabr; u64 host_mmcr[3]; u32 host_pmc[8]; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 7cb8a71..95a44a6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -264,6 +264,21 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) paca[cpu].kvm_hstate.xics_phys = addr; } +static inline u32 kvmppc_get_xics_latch(void) +{ + u32 xirr = get_paca()-kvm_hstate.saved_xirr; + + get_paca()-kvm_hstate.saved_xirr = 0; + + return xirr; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{ + paca[cpu].kvm_hstate.host_ipi = host_ipi; +} + +extern void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu); extern void kvm_linear_init(void); #else @@ -273,6 +288,18 @@ static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) static inline void kvm_linear_init(void) {} +static inline u32 kvmppc_get_xics_latch(void) +{ + return 0; +} + +static inline void kvmppc_set_host_ipi(int cpu, u8 host_ipi) +{} + +static inline void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + kvm_vcpu_kick(vcpu); +} #endif #ifdef CONFIG_KVM_XICS @@ -365,4 +392,6 @@ static inline ulong kvmppc_get_ea_indexed(struct kvm_vcpu *vcpu, int ra, int rb) return ea; } +extern void xics_wake_cpu(int cpu); + #endif /* __POWERPC_KVM_PPC_H__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d87c908..75e31ac 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -573,6 +573,8 @@ int main(void) HSTATE_FIELD(HSTATE_KVM_VCPU, kvm_vcpu); HSTATE_FIELD(HSTATE_KVM_VCORE, kvm_vcore); HSTATE_FIELD(HSTATE_XICS_PHYS, xics_phys); + HSTATE_FIELD(HSTATE_SAVED_XIRR, saved_xirr); + HSTATE_FIELD(HSTATE_HOST_IPI, host_ipi); HSTATE_FIELD(HSTATE_MMCR, host_mmcr); HSTATE_FIELD(HSTATE_PMC, host_pmc); HSTATE_FIELD(HSTATE_PURR, host_purr); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 506b5ea..0423c81 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -66,6 +66,31 @@ static void kvmppc_end_cede(struct kvm_vcpu *vcpu); static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); +void kvmppc_fast_vcpu_kick(struct kvm_vcpu *vcpu) +{ + int me; + int cpu = vcpu-cpu; + wait_queue_head_t *wqp; + + wqp = kvm_arch_vcpu_wq(vcpu); + if (waitqueue_active(wqp)) { + wake_up_interruptible(wqp); + ++vcpu-stat.halt_wakeup; + } + + me = get_cpu(); + + /* CPU points to the first thread of the core */ + if (cpu != me cpu = 0 cpu nr_cpu_ids) { + int real_cpu = cpu + vcpu-arch.ptid; + if (paca[real_cpu].kvm_hstate.xics_phys) + xics_wake_cpu(real_cpu); + else if (cpu_online(cpu)) +
[PATCH 6/8] KVM: PPC: Book3S: Add support for ibm,int-on/off RTAS calls
This adds support for the ibm,int-on and ibm,int-off RTAS calls to the in-kernel XICS emulation and corrects the handling of the saved priority by the ibm,set-xive RTAS call. With this, ibm,int-off sets the specified interrupt's priority in its saved_priority field and sets the priority to 0xff (the least favoured value). ibm,int-on restores the saved_priority to the priority field, and ibm,set-xive sets both the priority and the saved_priority to the specified priority value. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_ppc.h |2 + arch/powerpc/kvm/book3s_rtas.c | 40 + arch/powerpc/kvm/book3s_xics.c | 84 ++-- arch/powerpc/kvm/book3s_xics.h |2 +- 4 files changed, 113 insertions(+), 15 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 95a44a6..962fcc6 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -174,6 +174,8 @@ extern int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority); extern int kvmppc_xics_get_xive(struct kvm *kvm, u32 irq, u32 *server, u32 *priority); +extern int kvmppc_xics_int_on(struct kvm *kvm, u32 irq); +extern int kvmppc_xics_int_off(struct kvm *kvm, u32 irq); /* * Cuts out inst bits with ordering according to spec. diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index 44d652a..8e61156 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -63,6 +63,44 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) out: args-rets[0] = rc; } + +static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args-nargs != 1 || args-nret != 1) { + rc = -3; + goto out; + } + + irq = args-args[0]; + + rc = kvmppc_xics_int_off(vcpu-kvm, irq); + if (rc) + rc = -3; +out: + args-rets[0] = rc; +} + +static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) +{ + u32 irq; + int rc; + + if (args-nargs != 1 || args-nret != 1) { + rc = -3; + goto out; + } + + irq = args-args[0]; + + rc = kvmppc_xics_int_on(vcpu-kvm, irq); + if (rc) + rc = -3; +out: + args-rets[0] = rc; +} #endif /* CONFIG_KVM_XICS */ struct rtas_handler { @@ -74,6 +112,8 @@ static struct rtas_handler rtas_handlers[] = { #ifdef CONFIG_KVM_XICS { .name = ibm,set-xive, .handler = kvm_rtas_set_xive }, { .name = ibm,get-xive, .handler = kvm_rtas_get_xive }, + { .name = ibm,int-off, .handler = kvm_rtas_int_off }, + { .name = ibm,int-on, .handler = kvm_rtas_int_on }, #endif }; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index 7fd247c..9fb2d39 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -123,6 +123,28 @@ static void ics_check_resend(struct kvmppc_xics *xics, struct kvmppc_ics *ics, mutex_unlock(ics-lock); } +static bool write_xive(struct kvmppc_xics *xics, struct kvmppc_ics *ics, + struct ics_irq_state *state, + u32 server, u32 priority, u32 saved_priority) +{ + bool deliver; + + mutex_lock(ics-lock); + + state-server = server; + state-priority = priority; + state-saved_priority = saved_priority; + deliver = false; + if ((state-masked_pending || state-resend) priority != MASKED) { + state-masked_pending = 0; + deliver = true; + } + + mutex_unlock(ics-lock); + + return deliver; +} + int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) { struct kvmppc_xics *xics = kvm-arch.xics; @@ -130,7 +152,6 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) struct kvmppc_ics *ics; struct ics_irq_state *state; u16 src; - bool deliver; if (!xics) return -ENODEV; @@ -144,23 +165,11 @@ int kvmppc_xics_set_xive(struct kvm *kvm, u32 irq, u32 server, u32 priority) if (!icp) return -EINVAL; - mutex_lock(ics-lock); - XICS_DBG(set_xive %#x server %#x prio %#x MP:%d RS:%d\n, irq, server, priority, state-masked_pending, state-resend); - state-server = server; - state-priority = priority; - deliver = false; - if ((state-masked_pending || state-resend) priority != MASKED) { - state-masked_pending = 0; - deliver = true; - } - - mutex_unlock(ics-lock); - - if (deliver) + if (write_xive(xics, ics, state, server, priority, priority))
[PATCH 1/8] KVM: PPC: Book3S: Add infrastructure to implement kernel-side RTAS calls
From: Michael Ellerman mich...@ellerman.id.au For pseries machine emulation, in order to move the interrupt controller code to the kernel, we need to intercept some RTAS calls in the kernel itself. This adds an infrastructure to allow in-kernel handlers to be registered for RTAS services by name. A new ioctl, KVM_PPC_RTAS_DEFINE_TOKEN, then allows userspace to associate token values with those service names. Then, when the guest requests an RTAS service with one of those token values, it will be handled by the relevant in-kernel handler rather than being passed up to userspace as at present. Signed-off-by: Michael Ellerman mich...@ellerman.id.au Signed-off-by: Benjamin Herrenschmidt b...@kernel.crashing.org Signed-off-by: Paul Mackerras pau...@samba.org --- Documentation/virtual/kvm/api.txt | 19 arch/powerpc/include/asm/hvcall.h |3 + arch/powerpc/include/asm/kvm_host.h |1 + arch/powerpc/include/asm/kvm_ppc.h |4 + arch/powerpc/include/uapi/asm/kvm.h |6 ++ arch/powerpc/kvm/Makefile |1 + arch/powerpc/kvm/book3s_hv.c| 18 +++- arch/powerpc/kvm/book3s_pr.c|1 + arch/powerpc/kvm/book3s_pr_papr.c |7 ++ arch/powerpc/kvm/book3s_rtas.c | 182 +++ arch/powerpc/kvm/powerpc.c |8 ++ include/uapi/linux/kvm.h|3 + 12 files changed, 252 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/kvm/book3s_rtas.c diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 4c326ae..4247d65 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2325,6 +2325,25 @@ and distributor interface, the ioctl must be called after calling KVM_CREATE_IRQCHIP, but before calling KVM_RUN on any of the VCPUs. Calling this ioctl twice for any of the base addresses will return -EEXIST. +4.82 KVM_PPC_RTAS_DEFINE_TOKEN + +Capability: KVM_CAP_PPC_RTAS +Architectures: ppc +Type: vm ioctl +Parameters: struct kvm_rtas_token_args +Returns: 0 on success, -1 on error + +Defines a token value for a RTAS (Run Time Abstraction Services) +service in order to allow it to be handled in the kernel. The +argument struct gives the name of the service, which must be the name +of a service that has a kernel-side implementation. If the token +value is non-zero, it will be associated with that service, and +subsequent RTAS calls by the guest specifying that token will be +handled by the kernel. If the token value is 0, then any token +associated with the service will be forgotten, and subsequent RTAS +calls by the guest for that service will be passed to userspace to be +handled. + 5. The kvm_run structure diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 4bc2c3d..cf4df8e 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -270,6 +270,9 @@ #define H_SET_MODE 0x31C #define MAX_HCALL_OPCODE H_SET_MODE +/* Platform specific hcalls, used by KVM */ +#define H_RTAS 0xf000 + #ifndef __ASSEMBLY__ /** diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 36368c9..1650e26 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -255,6 +255,7 @@ struct kvm_arch { #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; + struct list_head rtas_tokens; #endif }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index b25d475..948b4c2 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -166,6 +166,10 @@ extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); +extern int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp); +extern int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu); +extern void kvmppc_rtas_tokens_free(struct kvm *kvm); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 36be2fe..18be86c 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -299,6 +299,12 @@ struct kvm_allocate_rma { __u64 rma_size; }; +/* for KVM_CAP_PPC_RTAS */ +struct kvm_rtas_token_args { + char name[120]; + __u64 token;/* Use a token of 0 to undefine a mapping */ +}; + struct kvm_book3e_206_tlb_entry { __u32 mas8; __u32 mas1; diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 4a2277a..d2c8a88 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -86,6 +86,7 @@ kvm-book3s_64-module-objs := \
[PATCH 03/17] KVM: Drop __KVM_HAVE_IOAPIC condition on irq routing
We have a capability enquire system that allows user space to ask kvm whether a feature is available. The point behind this system is that we can have different kernel configurations with different capabilities and user space can adjust accordingly. Because features can always be non existent, we can drop any #ifdefs on CAP defines that could be used generically, like the irq routing bits. These can be easily reused for non-IOAPIC systems as well. Signed-off-by: Alexander Graf ag...@suse.de --- include/uapi/linux/kvm.h |2 -- 1 files changed, 0 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 74d0ff3..c741902 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -579,9 +579,7 @@ struct kvm_ppc_smmu_info { #ifdef __KVM_HAVE_PIT #define KVM_CAP_REINJECT_CONTROL 24 #endif -#ifdef __KVM_HAVE_IOAPIC #define KVM_CAP_IRQ_ROUTING 25 -#endif #define KVM_CAP_IRQ_INJECT_STATUS 26 #ifdef __KVM_HAVE_DEVICE_ASSIGNMENT #define KVM_CAP_DEVICE_DEASSIGNMENT 27 -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 13/17] kvm/ppc/mpic: in-kernel MPIC emulation
From: Scott Wood scottw...@freescale.com Hook the MPIC code up to the KVM interfaces, add locking, etc. Signed-off-by: Scott Wood scottw...@freescale.com [agraf: add stub function for kvmppc_mpic_set_epr, non-booke, 64bit] Signed-off-by: Alexander Graf ag...@suse.de --- Documentation/virtual/kvm/devices/mpic.txt | 37 ++ arch/powerpc/include/asm/kvm_host.h|8 +- arch/powerpc/include/asm/kvm_ppc.h | 17 + arch/powerpc/include/uapi/asm/kvm.h|7 + arch/powerpc/kvm/Kconfig |9 + arch/powerpc/kvm/Makefile |2 + arch/powerpc/kvm/booke.c |8 +- arch/powerpc/kvm/mpic.c| 762 +--- arch/powerpc/kvm/powerpc.c | 12 +- include/linux/kvm_host.h |2 + include/uapi/linux/kvm.h |3 + virt/kvm/kvm_main.c|6 + 12 files changed, 673 insertions(+), 200 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/mpic.txt diff --git a/Documentation/virtual/kvm/devices/mpic.txt b/Documentation/virtual/kvm/devices/mpic.txt new file mode 100644 index 000..ce98e32 --- /dev/null +++ b/Documentation/virtual/kvm/devices/mpic.txt @@ -0,0 +1,37 @@ +MPIC interrupt controller += + +Device types supported: + KVM_DEV_TYPE_FSL_MPIC_20 Freescale MPIC v2.0 + KVM_DEV_TYPE_FSL_MPIC_42 Freescale MPIC v4.2 + +Only one MPIC instance, of any type, may be instantiated. The created +MPIC will act as the system interrupt controller, connecting to each +vcpu's interrupt inputs. + +Groups: + KVM_DEV_MPIC_GRP_MISC + Attributes: +KVM_DEV_MPIC_BASE_ADDR (rw, 64-bit) + Base address of the 256 KiB MPIC register space. Must be + naturally aligned. A value of zero disables the mapping. + Reset value is zero. + + KVM_DEV_MPIC_GRP_REGISTER (rw, 32-bit) +Access an MPIC register, as if the access were made from the guest. +attr is the byte offset into the MPIC register space. Accesses +must be 4-byte aligned. + +MSIs may be signaled by using this attribute group to write +to the relevant MSIIR. + + KVM_DEV_MPIC_GRP_IRQ_ACTIVE (rw, 32-bit) +IRQ input line for each standard openpic source. 0 is inactive and 1 +is active, regardless of interrupt sense. + +For edge-triggered interrupts: Writing 1 is considered an activating +edge, and writing 0 is ignored. Reading returns 1 if a previously +signaled edge has not been acknowledged, and 0 otherwise. + +attr is the IRQ number. IRQ numbers for standard sources are the +byte offset of the relevant IVPR from EIVPR0, divided by 32. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index e34f8fe..7e7aef9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -359,6 +359,11 @@ struct kvmppc_slb { #define KVMPPC_BOOKE_MAX_IAC 4 #define KVMPPC_BOOKE_MAX_DAC 2 +/* KVMPPC_EPR_USER takes precedence over KVMPPC_EPR_KERNEL */ +#define KVMPPC_EPR_NONE0 /* EPR not supported */ +#define KVMPPC_EPR_USER1 /* exit to userspace to fill EPR */ +#define KVMPPC_EPR_KERNEL 2 /* in-kernel irqchip */ + struct kvmppc_booke_debug_reg { u32 dbcr0; u32 dbcr1; @@ -522,7 +527,7 @@ struct kvm_vcpu_arch { u8 sane; u8 cpu_type; u8 hcall_needed; - u8 epr_enabled; + u8 epr_flags; /* KVMPPC_EPR_xxx */ u8 epr_needed; u32 cpr0_cfgaddr; /* holds the last set cpr0_cfgaddr */ @@ -589,5 +594,6 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_FQPR 0x0060 #define __KVM_HAVE_ARCH_WQP +#define __KVM_HAVE_CREATE_DEVICE #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f589307..0b86604 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -164,6 +164,8 @@ extern int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu); extern int kvm_vm_ioctl_get_htab_fd(struct kvm *kvm, struct kvm_get_htab_fd *); +int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq); + /* * Cuts out inst bits with ordering according to spec. * That means the leftmost bit is zero. All given bits are included. @@ -245,6 +247,9 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); +struct openpic; +void kvmppc_mpic_put(struct openpic *opp); + #ifdef CONFIG_KVM_BOOK3S_64_HV static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) { @@ -270,6 +275,18 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) #endif } +#ifdef CONFIG_KVM_MPIC + +void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu); + +#else + +static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) +{ +} + +#endif /*
[PATCH 11/17] kvm/ppc/mpic: remove some obviously unneeded code
From: Scott Wood scottw...@freescale.com Remove some parts of the code that are obviously QEMU or Raven specific before fixing style issues, to reduce the style issues that need to be fixed. Signed-off-by: Scott Wood scottw...@freescale.com Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/kvm/mpic.c | 344 --- 1 files changed, 0 insertions(+), 344 deletions(-) diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 57655b9..d6d70a4 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -22,39 +22,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ -/* - * - * Based on OpenPic implementations: - * - Intel GW80314 I/O companion chip developer's manual - * - Motorola MPC8245 MPC8540 user manuals. - * - Motorola MCP750 (aka Raven) programmer manual. - * - Motorola Harrier programmer manuel - * - * Serial interrupts, as implemented in Raven chipset are not supported yet. - * - */ -#include hw.h -#include ppc/mac.h -#include pci/pci.h -#include openpic.h -#include sysbus.h -#include pci/msi.h -#include qemu/bitops.h -#include ppc.h - -//#define DEBUG_OPENPIC - -#ifdef DEBUG_OPENPIC -static const int debug_openpic = 1; -#else -static const int debug_openpic = 0; -#endif - -#define DPRINTF(fmt, ...) do { \ -if (debug_openpic) { \ -printf(fmt , ## __VA_ARGS__); \ -} \ -} while (0) #define MAX_CPU 32 #define MAX_SRC 256 @@ -82,21 +49,6 @@ static const int debug_openpic = 0; #define OPENPIC_CPU_REG_START0x2 #define OPENPIC_CPU_REG_SIZE 0x100 + ((MAX_CPU - 1) * 0x1000) -/* Raven */ -#define RAVEN_MAX_CPU 2 -#define RAVEN_MAX_EXT 48 -#define RAVEN_MAX_IRQ 64 -#define RAVEN_MAX_TMR MAX_TMR -#define RAVEN_MAX_IPI MAX_IPI - -/* Interrupt definitions */ -#define RAVEN_FE_IRQ (RAVEN_MAX_EXT) /* Internal functional IRQ */ -#define RAVEN_ERR_IRQ(RAVEN_MAX_EXT + 1) /* Error IRQ */ -#define RAVEN_TMR_IRQ(RAVEN_MAX_EXT + 2) /* First timer IRQ */ -#define RAVEN_IPI_IRQ(RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */ -/* First doorbell IRQ */ -#define RAVEN_DBL_IRQ(RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI)) - typedef struct FslMpicInfo { int max_ext; } FslMpicInfo; @@ -138,44 +90,6 @@ static FslMpicInfo fsl_mpic_42 = { #define ILR_INTTGT_CINT 0x01 /* critical */ #define ILR_INTTGT_MCP0x02 /* machine check */ -/* The currently supported INTTGT values happen to be the same as QEMU's - * openpic output codes, but don't depend on this. The output codes - * could change (unlikely, but...) or support could be added for - * more INTTGT values. - */ -static const int inttgt_output[][2] = { - {ILR_INTTGT_INT, OPENPIC_OUTPUT_INT}, - {ILR_INTTGT_CINT, OPENPIC_OUTPUT_CINT}, - {ILR_INTTGT_MCP, OPENPIC_OUTPUT_MCK}, -}; - -static int inttgt_to_output(int inttgt) -{ - int i; - - for (i = 0; i ARRAY_SIZE(inttgt_output); i++) { - if (inttgt_output[i][0] == inttgt) { - return inttgt_output[i][1]; - } - } - - fprintf(stderr, %s: unsupported inttgt %d\n, __func__, inttgt); - return OPENPIC_OUTPUT_INT; -} - -static int output_to_inttgt(int output) -{ - int i; - - for (i = 0; i ARRAY_SIZE(inttgt_output); i++) { - if (inttgt_output[i][1] == output) { - return inttgt_output[i][0]; - } - } - - abort(); -} - #define MSIIR_OFFSET 0x140 #define MSIIR_SRS_SHIFT29 #define MSIIR_SRS_MASK (0x7 MSIIR_SRS_SHIFT) @@ -1265,228 +1179,36 @@ static uint64_t openpic_cpu_read(void *opaque, hwaddr addr, unsigned len) return openpic_cpu_read_internal(opaque, addr, (addr 0x1f000) 12); } -static const MemoryRegionOps openpic_glb_ops_le = { - .write = openpic_gbl_write, - .read = openpic_gbl_read, - .endianness = DEVICE_LITTLE_ENDIAN, - .impl = { -.min_access_size = 4, -.max_access_size = 4, -}, -}; - static const MemoryRegionOps openpic_glb_ops_be = { .write = openpic_gbl_write, .read = openpic_gbl_read, - .endianness = DEVICE_BIG_ENDIAN, - .impl = { -.min_access_size = 4, -.max_access_size = 4, -}, -}; - -static const MemoryRegionOps openpic_tmr_ops_le = { - .write = openpic_tmr_write, - .read = openpic_tmr_read, - .endianness = DEVICE_LITTLE_ENDIAN, - .impl = { -.min_access_size = 4, -.max_access_size = 4, -}, }; static const MemoryRegionOps openpic_tmr_ops_be = { .write = openpic_tmr_write, .read = openpic_tmr_read, - .endianness = DEVICE_BIG_ENDIAN, - .impl = { -.min_access_size = 4, -
[PATCH 12/17] kvm/ppc/mpic: adapt to kernel style and environment
From: Scott Wood scottw...@freescale.com Remove braces that Linux style doesn't permit, remove space after '*' that Lindent added, keep error/debug strings contiguous, etc. Substitute type names, debug prints, etc. Signed-off-by: Scott Wood scottw...@freescale.com Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/kvm/mpic.c | 445 ++- 1 files changed, 208 insertions(+), 237 deletions(-) diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index d6d70a4..1df67ae 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -42,22 +42,22 @@ #define OPENPIC_TMR_REG_SIZE 0x220 #define OPENPIC_MSI_REG_START0x1600 #define OPENPIC_MSI_REG_SIZE 0x200 -#define OPENPIC_SUMMARY_REG_START 0x3800 -#define OPENPIC_SUMMARY_REG_SIZE0x800 +#define OPENPIC_SUMMARY_REG_START0x3800 +#define OPENPIC_SUMMARY_REG_SIZE 0x800 #define OPENPIC_SRC_REG_START0x1 #define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20) #define OPENPIC_CPU_REG_START0x2 -#define OPENPIC_CPU_REG_SIZE 0x100 + ((MAX_CPU - 1) * 0x1000) +#define OPENPIC_CPU_REG_SIZE (0x100 + ((MAX_CPU - 1) * 0x1000)) -typedef struct FslMpicInfo { +struct fsl_mpic_info { int max_ext; -} FslMpicInfo; +}; -static FslMpicInfo fsl_mpic_20 = { +static struct fsl_mpic_info fsl_mpic_20 = { .max_ext = 12, }; -static FslMpicInfo fsl_mpic_42 = { +static struct fsl_mpic_info fsl_mpic_42 = { .max_ext = 12, }; @@ -100,44 +100,43 @@ static int get_current_cpu(void) { CPUState *cpu_single_cpu; - if (!cpu_single_env) { + if (!cpu_single_env) return -1; - } cpu_single_cpu = ENV_GET_CPU(cpu_single_env); return cpu_single_cpu-cpu_index; } -static uint32_t openpic_cpu_read_internal(void *opaque, hwaddr addr, int idx); -static void openpic_cpu_write_internal(void *opaque, hwaddr addr, +static uint32_t openpic_cpu_read_internal(void *opaque, gpa_t addr, int idx); +static void openpic_cpu_write_internal(void *opaque, gpa_t addr, uint32_t val, int idx); -typedef enum IRQType { +enum irq_type { IRQ_TYPE_NORMAL = 0, IRQ_TYPE_FSLINT,/* FSL internal interrupt -- level only */ IRQ_TYPE_FSLSPECIAL,/* FSL timer/IPI interrupt, edge, no polarity */ -} IRQType; +}; -typedef struct IRQQueue { +struct irq_queue { /* Round up to the nearest 64 IRQs so that the queue length * won't change when moving between 32 and 64 bit hosts. */ unsigned long queue[BITS_TO_LONGS((MAX_IRQ + 63) ~63)]; int next; int priority; -} IRQQueue; +}; -typedef struct IRQSource { +struct irq_source { uint32_t ivpr; /* IRQ vector/priority register */ uint32_t idr; /* IRQ destination register */ uint32_t destmask; /* bitmap of CPU destinations */ int last_cpu; int output; /* IRQ level, e.g. OPENPIC_OUTPUT_INT */ int pending;/* TRUE if IRQ is pending */ - IRQType type; + enum irq_type type; bool level:1; /* level-triggered */ - bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */ -} IRQSource; + bool nomask:1; /* critical interrupts ignore mask on some FSL MPICs */ +}; #define IVPR_MASK_SHIFT 31 #define IVPR_MASK_MASK(1 IVPR_MASK_SHIFT) @@ -158,22 +157,19 @@ typedef struct IRQSource { #define IDR_EP 0x8000 /* external pin */ #define IDR_CI 0x4000 /* critical interrupt */ -typedef struct IRQDest { +struct irq_dest { int32_t ctpr; /* CPU current task priority */ - IRQQueue raised; - IRQQueue servicing; + struct irq_queue raised; + struct irq_queue servicing; qemu_irq *irqs; /* Count of IRQ sources asserting on non-INT outputs */ uint32_t outputs_active[OPENPIC_OUTPUT_NB]; -} IRQDest; - -typedef struct OpenPICState { - SysBusDevice busdev; - MemoryRegion mem; +}; +struct openpic { /* Behavior control */ - FslMpicInfo *fsl; + struct fsl_mpic_info *fsl; uint32_t model; uint32_t flags; uint32_t nb_irqs; @@ -186,9 +182,6 @@ typedef struct OpenPICState { uint32_t brr1; uint32_t mpic_mode_mask; - /* Sub-regions */ - MemoryRegion sub_io_mem[6]; - /* Global registers */ uint32_t frr; /* Feature reporting register */ uint32_t gcr; /* Global configuration register */ @@ -196,9 +189,9 @@ typedef struct OpenPICState { uint32_t spve; /* Spurious vector register */ uint32_t tfrr; /* Timer frequency reporting register */ /* Source registers */ - IRQSource src[MAX_IRQ]; + struct irq_source src[MAX_IRQ]; /* Local
[PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC
Now that all the irq routing and irqfd pieces are generic, we can expose real irqchip support to all of KVM's internal helpers. This allows us to use irqfd with the in-kernel MPIC. Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/include/asm/kvm_host.h |7 ++ arch/powerpc/include/uapi/asm/kvm.h |1 + arch/powerpc/kvm/Kconfig|3 + arch/powerpc/kvm/Makefile |1 + arch/powerpc/kvm/irq.h | 17 ++ arch/powerpc/kvm/mpic.c | 106 +++ 6 files changed, 135 insertions(+), 0 deletions(-) create mode 100644 arch/powerpc/kvm/irq.h diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 36368c9..d5fbb4b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -44,6 +44,10 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif +/* These values are internal and can be increased later */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 256 + #if !defined(CONFIG_KVM_440) #include linux/mmu_notifier.h @@ -256,6 +260,9 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; #endif +#ifdef CONFIG_KVM_MPIC + void *mpic; +#endif }; /* diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 36be2fe..3537bf3 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -25,6 +25,7 @@ /* Select powerpc specific features in linux/kvm.h */ #define __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT +#define __KVM_HAVE_IRQCHIP struct kvm_regs { __u64 pc; diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig index 938a729..a608570 100644 --- a/arch/powerpc/kvm/Kconfig +++ b/arch/powerpc/kvm/Kconfig @@ -154,6 +154,9 @@ config KVM_E500MC config KVM_MPIC bool KVM in-kernel MPIC emulation depends on KVM + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING + select HAVE_KVM_MSI help Enable support for emulating MPIC devices inside the host kernel, rather than relying on userspace to emulate. diff --git a/arch/powerpc/kvm/Makefile b/arch/powerpc/kvm/Makefile index 4a2277a..4eada0c 100644 --- a/arch/powerpc/kvm/Makefile +++ b/arch/powerpc/kvm/Makefile @@ -104,6 +104,7 @@ kvm-book3s_32-objs := \ kvm-objs-$(CONFIG_KVM_BOOK3S_32) := $(kvm-book3s_32-objs) kvm-objs-$(CONFIG_KVM_MPIC) += mpic.o +kvm-objs-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(addprefix ../../../virt/kvm/, irqchip.o) kvm-objs := $(kvm-objs-m) $(kvm-objs-y) diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h new file mode 100644 index 000..f1e27fd --- /dev/null +++ b/arch/powerpc/kvm/irq.h @@ -0,0 +1,17 @@ +#ifndef __IRQ_H +#define __IRQ_H + +#include linux/kvm_host.h + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm-arch.mpic != NULL); +#endif + smp_rmb(); + return ret; +} + +#endif diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index b1ae02d..c8de2f2 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1087,6 +1087,8 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr, } IRQ_resetbit(dst-servicing, s_IRQ); + /* Notify listeners that the IRQ is over */ + kvm_notify_acked_irq(opp-kvm, 0, s_IRQ); /* Set up next servicing IRQ */ s_IRQ = IRQ_get_next(opp, dst-servicing); /* Check queued interrupts. */ @@ -1639,14 +1641,42 @@ static void mpic_destroy(struct kvm_device *dev) unmap_mmio(opp); } + dev-kvm-arch.mpic = NULL; kfree(opp); } +static int mpic_set_default_irq_routing(struct openpic *opp) +{ + int i; + struct kvm_irq_routing_entry *routing; + + /* XXX be more dynamic if we ever want to support multiple MPIC chips */ + routing = kzalloc((sizeof(*routing) * opp-nb_irqs), GFP_KERNEL); + if (!routing) + return -ENOMEM; + + for (i = 0; i opp-nb_irqs; i++) { + routing[i].gsi = i; + routing[i].type = KVM_IRQ_ROUTING_IRQCHIP; + routing[i].u.irqchip.irqchip = 0; + routing[i].u.irqchip.pin = i; + } + + kvm_set_irq_routing(opp-kvm, routing, opp-nb_irqs, 0); + + kfree(routing); + return 0; +} + static int mpic_create(struct kvm_device *dev, u32 type) { struct openpic *opp; int ret; + /* We only support one MPIC at a time for now */ + if (dev-kvm-arch.mpic) + return -EINVAL; + opp = kzalloc(sizeof(struct openpic), GFP_KERNEL); if (!opp) return -ENOMEM; @@ -1691,10 +1721,18 @@ static int mpic_create(struct kvm_device *dev, u32 type) goto err;
[PATCH 10/17] kvm/ppc/mpic: import hw/openpic.c from QEMU
From: Scott Wood scottw...@freescale.com This is QEMU's hw/openpic.c from commit abd8d4a4d6dfea7ddea72f095f993e1de941614e (Update version for 1.4.0-rc0), run through Lindent with no other changes to ease merging future changes between Linux and QEMU. Remaining style issues (including those introduced by Lindent) will be fixed in a later patch. Signed-off-by: Scott Wood scottw...@freescale.com Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/kvm/mpic.c | 1686 +++ 1 files changed, 1686 insertions(+), 0 deletions(-) create mode 100644 arch/powerpc/kvm/mpic.c diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c new file mode 100644 index 000..57655b9 --- /dev/null +++ b/arch/powerpc/kvm/mpic.c @@ -0,0 +1,1686 @@ +/* + * OpenPIC emulation + * + * Copyright (c) 2004 Jocelyn Mayer + * 2011 Alexander Graf + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the Software), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +/* + * + * Based on OpenPic implementations: + * - Intel GW80314 I/O companion chip developer's manual + * - Motorola MPC8245 MPC8540 user manuals. + * - Motorola MCP750 (aka Raven) programmer manual. + * - Motorola Harrier programmer manuel + * + * Serial interrupts, as implemented in Raven chipset are not supported yet. + * + */ +#include hw.h +#include ppc/mac.h +#include pci/pci.h +#include openpic.h +#include sysbus.h +#include pci/msi.h +#include qemu/bitops.h +#include ppc.h + +//#define DEBUG_OPENPIC + +#ifdef DEBUG_OPENPIC +static const int debug_openpic = 1; +#else +static const int debug_openpic = 0; +#endif + +#define DPRINTF(fmt, ...) do { \ +if (debug_openpic) { \ +printf(fmt , ## __VA_ARGS__); \ +} \ +} while (0) + +#define MAX_CPU 32 +#define MAX_SRC 256 +#define MAX_TMR 4 +#define MAX_IPI 4 +#define MAX_MSI 8 +#define MAX_IRQ (MAX_SRC + MAX_IPI + MAX_TMR) +#define VID 0x03 /* MPIC version ID */ + +/* OpenPIC capability flags */ +#define OPENPIC_FLAG_IDR_CRIT (1 0) +#define OPENPIC_FLAG_ILR (2 0) + +/* OpenPIC address map */ +#define OPENPIC_GLB_REG_START0x0 +#define OPENPIC_GLB_REG_SIZE 0x10F0 +#define OPENPIC_TMR_REG_START0x10F0 +#define OPENPIC_TMR_REG_SIZE 0x220 +#define OPENPIC_MSI_REG_START0x1600 +#define OPENPIC_MSI_REG_SIZE 0x200 +#define OPENPIC_SUMMARY_REG_START 0x3800 +#define OPENPIC_SUMMARY_REG_SIZE0x800 +#define OPENPIC_SRC_REG_START0x1 +#define OPENPIC_SRC_REG_SIZE (MAX_SRC * 0x20) +#define OPENPIC_CPU_REG_START0x2 +#define OPENPIC_CPU_REG_SIZE 0x100 + ((MAX_CPU - 1) * 0x1000) + +/* Raven */ +#define RAVEN_MAX_CPU 2 +#define RAVEN_MAX_EXT 48 +#define RAVEN_MAX_IRQ 64 +#define RAVEN_MAX_TMR MAX_TMR +#define RAVEN_MAX_IPI MAX_IPI + +/* Interrupt definitions */ +#define RAVEN_FE_IRQ (RAVEN_MAX_EXT) /* Internal functional IRQ */ +#define RAVEN_ERR_IRQ(RAVEN_MAX_EXT + 1) /* Error IRQ */ +#define RAVEN_TMR_IRQ(RAVEN_MAX_EXT + 2) /* First timer IRQ */ +#define RAVEN_IPI_IRQ(RAVEN_TMR_IRQ + RAVEN_MAX_TMR) /* First IPI IRQ */ +/* First doorbell IRQ */ +#define RAVEN_DBL_IRQ(RAVEN_IPI_IRQ + (RAVEN_MAX_CPU * RAVEN_MAX_IPI)) + +typedef struct FslMpicInfo { + int max_ext; +} FslMpicInfo; + +static FslMpicInfo fsl_mpic_20 = { + .max_ext = 12, +}; + +static FslMpicInfo fsl_mpic_42 = { + .max_ext = 12, +}; + +#define FRR_NIRQ_SHIFT16 +#define FRR_NCPU_SHIFT 8 +#define FRR_VID_SHIFT 0 + +#define VID_REVISION_1_2 2 +#define VID_REVISION_1_3 3 + +#define VIR_GENERIC 0x/* Generic Vendor ID */ + +#define GCR_RESET0x8000 +#define GCR_MODE_PASS0x +#define GCR_MODE_MIXED 0x2000 +#define GCR_MODE_PROXY 0x6000 + +#define TBCR_CI 0x8000 /* count inhibit */ +#define TCCR_TOG 0x8000 /* toggles
[PATCH 14/17] kvm/ppc/mpic: add KVM_CAP_IRQ_MPIC
From: Scott Wood scottw...@freescale.com Enabling this capability connects the vcpu to the designated in-kernel MPIC. Using explicit connections between vcpus and irqchips allows for flexibility, but the main benefit at the moment is that it simplifies the code -- KVM doesn't need vm-global state to remember which MPIC object is associated with this vm, and it doesn't need to care about ordering between irqchip creation and vcpu creation. Signed-off-by: Scott Wood scottw...@freescale.com [agraf: add stub functions for kvmppc_mpic_{dis,}connect_vcpu] Signed-off-by: Alexander Graf ag...@suse.de --- Documentation/virtual/kvm/api.txt |8 +++ arch/powerpc/include/asm/kvm_host.h |9 arch/powerpc/include/asm/kvm_ppc.h | 15 ++- arch/powerpc/kvm/booke.c|4 ++ arch/powerpc/kvm/mpic.c | 82 --- arch/powerpc/kvm/powerpc.c | 30 + include/uapi/linux/kvm.h|1 + 7 files changed, 141 insertions(+), 8 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index d52f3f9..4c326ae 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2728,3 +2728,11 @@ to receive the topmost interrupt vector. When disabled (args[0] == 0), behavior is as if this facility is unsupported. When this capability is enabled, KVM_EXIT_EPR can occur. + +6.6 KVM_CAP_IRQ_MPIC + +Architectures: ppc +Parameters: args[0] is the MPIC device fd +args[1] is the MPIC CPU number for this vcpu + +This capability connects the vcpu to an in-kernel MPIC device. diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 7e7aef9..36368c9 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -375,6 +375,11 @@ struct kvmppc_booke_debug_reg { u64 dac[KVMPPC_BOOKE_MAX_DAC]; }; +#define KVMPPC_IRQ_DEFAULT 0 +#define KVMPPC_IRQ_MPIC1 + +struct openpic; + struct kvm_vcpu_arch { ulong host_stack; u32 host_pid; @@ -554,6 +559,10 @@ struct kvm_vcpu_arch { unsigned long magic_page_pa; /* phys addr to map the magic page to */ unsigned long magic_page_ea; /* effect. addr to map the magic page to */ + int irq_type; /* one of KVM_IRQ_* */ + int irq_cpu_id; + struct openpic *mpic; /* KVM_IRQ_MPIC */ + #ifdef CONFIG_KVM_BOOK3S_64_HV struct kvm_vcpu_arch_shared shregs; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 0b86604..c9d9faf 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -248,7 +248,6 @@ int kvmppc_set_one_reg(struct kvm_vcpu *vcpu, u64 id, union kvmppc_one_reg *); void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 pid); struct openpic; -void kvmppc_mpic_put(struct openpic *opp); #ifdef CONFIG_KVM_BOOK3S_64_HV static inline void kvmppc_set_xics_phys(int cpu, unsigned long addr) @@ -278,6 +277,9 @@ static inline void kvmppc_set_epr(struct kvm_vcpu *vcpu, u32 epr) #ifdef CONFIG_KVM_MPIC void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu); +int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, struct kvm_vcpu *vcpu, +u32 cpu); +void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, struct kvm_vcpu *vcpu); #else @@ -285,6 +287,17 @@ static inline void kvmppc_mpic_set_epr(struct kvm_vcpu *vcpu) { } +static inline int kvmppc_mpic_connect_vcpu(struct kvm_device *dev, + struct kvm_vcpu *vcpu, u32 cpu) +{ + return -EINVAL; +} + +static inline void kvmppc_mpic_disconnect_vcpu(struct openpic *opp, + struct kvm_vcpu *vcpu) +{ +} + #endif /* CONFIG_KVM_MPIC */ int kvm_vcpu_ioctl_config_tlb(struct kvm_vcpu *vcpu, diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index cff53d4..0097912 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -430,6 +430,10 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu *vcpu, if (update_epr == true) { if (vcpu-arch.epr_flags KVMPPC_EPR_USER) kvm_make_request(KVM_REQ_EPR_EXIT, vcpu); + else if (vcpu-arch.epr_flags KVMPPC_EPR_KERNEL) { + BUG_ON(vcpu-arch.irq_type != KVMPPC_IRQ_MPIC); + kvmppc_mpic_set_epr(vcpu); + } } new_msr = msr_mask; diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index 60857d5..b1ae02d 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -115,7 +115,7 @@ static int get_current_cpu(void) { #if defined(CONFIG_KVM) defined(CONFIG_BOOKE) struct kvm_vcpu *vcpu = current-thread.kvm_vcpu; - return vcpu ? vcpu-vcpu_id : -1; + return vcpu ? vcpu-arch.irq_cpu_id : -1; #else
[PATCH 04/17] KVM: Remove kvm_get_intr_delivery_bitmask
The prototype has been stale for a while, I can't spot any real function define behind it. Let's just remove it. Signed-off-by: Alexander Graf ag...@suse.de --- include/linux/kvm_host.h |5 - 1 files changed, 0 insertions(+), 5 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4215d4f..a7bfe9d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -719,11 +719,6 @@ void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq, void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, bool mask); -#ifdef __KVM_HAVE_IOAPIC -void kvm_get_intr_delivery_bitmask(struct kvm_ioapic *ioapic, - union kvm_ioapic_redirect_entry *entry, - unsigned long *deliver_bitmask); -#endif int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/17] KVM: Move irq routing setup to irqchip.c
Setting up IRQ routes is nothing IOAPIC specific. Extract everything that really is generic code into irqchip.c and only leave the ioapic specific bits to irq_comm.c. Signed-off-by: Alexander Graf ag...@suse.de --- include/linux/kvm_host.h |3 ++ virt/kvm/irq_comm.c | 76 ++--- virt/kvm/irqchip.c | 85 ++ 3 files changed, 91 insertions(+), 73 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a7bfe9d..dcef724 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -961,6 +961,9 @@ int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, unsigned flags); +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue); void kvm_free_irq_routing(struct kvm *kvm); int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index d5008f4..e2e6b44 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -271,27 +271,14 @@ void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin, rcu_read_unlock(); } -static int setup_routing_entry(struct kvm_irq_routing_table *rt, - struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) { int r = -EINVAL; int delta; unsigned max_pin; - struct kvm_kernel_irq_routing_entry *ei; - /* -* Do not allow GSI to be mapped to the same irqchip more than once. -* Allow only one to one mapping between GSI and MSI. -*/ - hlist_for_each_entry(ei, rt-map[ue-gsi], link) - if (ei-type == KVM_IRQ_ROUTING_MSI || - ue-type == KVM_IRQ_ROUTING_MSI || - ue-u.irqchip.irqchip == ei-irqchip.irqchip) - return r; - - e-gsi = ue-gsi; - e-type = ue-type; switch (ue-type) { case KVM_IRQ_ROUTING_IRQCHIP: delta = 0; @@ -328,68 +315,11 @@ static int setup_routing_entry(struct kvm_irq_routing_table *rt, goto out; } - hlist_add_head(e-link, rt-map[e-gsi]); r = 0; out: return r; } -int kvm_set_irq_routing(struct kvm *kvm, - const struct kvm_irq_routing_entry *ue, - unsigned nr, - unsigned flags) -{ - struct kvm_irq_routing_table *new, *old; - u32 i, j, nr_rt_entries = 0; - int r; - - for (i = 0; i nr; ++i) { - if (ue[i].gsi = KVM_MAX_IRQ_ROUTES) - return -EINVAL; - nr_rt_entries = max(nr_rt_entries, ue[i].gsi); - } - - nr_rt_entries += 1; - - new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)) - + (nr * sizeof(struct kvm_kernel_irq_routing_entry)), - GFP_KERNEL); - - if (!new) - return -ENOMEM; - - new-rt_entries = (void *)new-map[nr_rt_entries]; - - new-nr_rt_entries = nr_rt_entries; - for (i = 0; i 3; i++) - for (j = 0; j KVM_IRQCHIP_NUM_PINS; j++) - new-chip[i][j] = -1; - - for (i = 0; i nr; ++i) { - r = -EINVAL; - if (ue-flags) - goto out; - r = setup_routing_entry(new, new-rt_entries[i], ue); - if (r) - goto out; - ++ue; - } - - mutex_lock(kvm-irq_lock); - old = kvm-irq_routing; - kvm_irq_routing_update(kvm, new); - mutex_unlock(kvm-irq_lock); - - synchronize_rcu(); - - new = old; - r = 0; - -out: - kfree(new); - return r; -} - #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 12f7f26..20dc9e4 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -150,3 +150,88 @@ void kvm_free_irq_routing(struct kvm *kvm) at this stage */ kfree(kvm-irq_routing); } + +static int setup_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + struct kvm_kernel_irq_routing_entry *ei; + + /* +
[PATCH 00/17] KVM: PPC: In-kernel MPIC support with irqfd
Hi, This patch set contains a fully working implementation of the in-kernel MPIC from Scott with a few fixups and a new version of my irqfd generalization patch set. Major changes to my last irqfd set are: - depend on CONFIG_ defines rather than __KVM defines - fix compile issues - fix the kvm_irqchip{,s} typo I have refrained from touching IA64 at all in this patch set. It's marked as BROKEN, I doubt it even compiles at all today. The only sensible thing to do would be to remove all of IA64 kvm code from the kernel tree, but that is out of scope for this patch set and definitely should not gate it. Alex Alexander Graf (11): KVM: Add KVM_IRQCHIP_NUM_PINS in addition to KVM_IOAPIC_NUM_PINS KVM: Introduce CONFIG_HAVE_KVM_IRQ_ROUTING KVM: Drop __KVM_HAVE_IOAPIC condition on irq routing KVM: Remove kvm_get_intr_delivery_bitmask KVM: Move irq routing to generic code KVM: Extract generic irqchip logic into irqchip.c KVM: Move irq routing setup to irqchip.c KVM: Move irqfd resample cap handling to generic code KVM: PPC: Support irq routing and irqfd for in-kernel MPIC KVM: PPC: MPIC: Add support for KVM_IRQ_LINE KVM: PPC: MPIC: Restrict to e500 platforms Scott Wood (6): kvm: add device control API kvm/ppc/mpic: import hw/openpic.c from QEMU kvm/ppc/mpic: remove some obviously unneeded code kvm/ppc/mpic: adapt to kernel style and environment kvm/ppc/mpic: in-kernel MPIC emulation kvm/ppc/mpic: add KVM_CAP_IRQ_MPIC Documentation/virtual/kvm/api.txt | 78 ++ Documentation/virtual/kvm/devices/README |1 + Documentation/virtual/kvm/devices/mpic.txt | 37 + arch/powerpc/include/asm/kvm_host.h| 24 +- arch/powerpc/include/asm/kvm_ppc.h | 30 + arch/powerpc/include/uapi/asm/kvm.h|9 + arch/powerpc/kvm/Kconfig | 12 + arch/powerpc/kvm/Makefile |3 + arch/powerpc/kvm/booke.c | 12 +- arch/powerpc/kvm/irq.h | 17 + arch/powerpc/kvm/mpic.c| 1869 arch/powerpc/kvm/powerpc.c | 55 +- arch/x86/include/asm/kvm_host.h|2 + arch/x86/kvm/Kconfig |1 + arch/x86/kvm/Makefile |2 +- arch/x86/kvm/x86.c |1 - include/linux/kvm_host.h | 53 +- include/trace/events/kvm.h | 12 +- include/uapi/linux/kvm.h | 33 +- virt/kvm/Kconfig |3 + virt/kvm/assigned-dev.c| 30 - virt/kvm/eventfd.c |6 +- virt/kvm/irq_comm.c| 194 +--- virt/kvm/irqchip.c | 237 virt/kvm/kvm_main.c| 170 +++- 25 files changed, 2641 insertions(+), 250 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/README create mode 100644 Documentation/virtual/kvm/devices/mpic.txt create mode 100644 arch/powerpc/kvm/irq.h create mode 100644 arch/powerpc/kvm/mpic.c create mode 100644 virt/kvm/irqchip.c -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/17] KVM: Move irqfd resample cap handling to generic code
Now that we have most irqfd code completely platform agnostic, let's move irqfd's resample capability return to generic code as well. Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/kvm/x86.c |1 - virt/kvm/kvm_main.c |3 +++ 2 files changed, 3 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 50e2e10..888d892 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2513,7 +2513,6 @@ int kvm_dev_ioctl_check_extension(long ext) case KVM_CAP_PCI_2_3: case KVM_CAP_KVMCLOCK_CTRL: case KVM_CAP_READONLY_MEM: - case KVM_CAP_IRQFD_RESAMPLE: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index b6f3354..f9492f3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2433,6 +2433,9 @@ static long kvm_dev_ioctl_check_extension_generic(long arg) #ifdef CONFIG_HAVE_KVM_MSI case KVM_CAP_SIGNAL_MSI: #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_CAP_IRQFD_RESAMPLE: +#endif return 1; #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING case KVM_CAP_IRQ_ROUTING: -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/17] kvm: add device control API
From: Scott Wood scottw...@freescale.com Currently, devices that are emulated inside KVM are configured in a hardcoded manner based on an assumption that any given architecture only has one way to do it. If there's any need to access device state, it is done through inflexible one-purpose-only IOCTLs (e.g. KVM_GET/SET_LAPIC). Defining new IOCTLs for every little thing is cumbersome and depletes a limited numberspace. This API provides a mechanism to instantiate a device of a certain type, returning an ID that can be used to set/get attributes of the device. Attributes may include configuration parameters (e.g. register base address), device state, operational commands, etc. It is similar to the ONE_REG API, except that it acts on devices rather than vcpus. Both device types and individual attributes can be tested without having to create the device or get/set the attribute, without the need for separately managing enumerated capabilities. Signed-off-by: Scott Wood scottw...@freescale.com Signed-off-by: Alexander Graf ag...@suse.de --- Documentation/virtual/kvm/api.txt| 70 Documentation/virtual/kvm/devices/README |1 + include/linux/kvm_host.h | 35 include/uapi/linux/kvm.h | 27 ++ virt/kvm/kvm_main.c | 129 ++ 5 files changed, 262 insertions(+), 0 deletions(-) create mode 100644 Documentation/virtual/kvm/devices/README diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 976eb65..d52f3f9 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2173,6 +2173,76 @@ header; first `n_valid' valid entries with contents from the data written, then `n_invalid' invalid entries, invalidating any previously valid entries found. +4.79 KVM_CREATE_DEVICE + +Capability: KVM_CAP_DEVICE_CTRL +Type: vm ioctl +Parameters: struct kvm_create_device (in/out) +Returns: 0 on success, -1 on error +Errors: + ENODEV: The device type is unknown or unsupported + EEXIST: Device already created, and this type of device may not + be instantiated multiple times + + Other error conditions may be defined by individual device types or + have their standard meanings. + +Creates an emulated device in the kernel. The file descriptor returned +in fd can be used with KVM_SET/GET/HAS_DEVICE_ATTR. + +If the KVM_CREATE_DEVICE_TEST flag is set, only test whether the +device type is supported (not necessarily whether it can be created +in the current vm). + +Individual devices should not define flags. Attributes should be used +for specifying any behavior that is not implied by the device type +number. + +struct kvm_create_device { + __u32 type; /* in: KVM_DEV_TYPE_xxx */ + __u32 fd; /* out: device handle */ + __u32 flags; /* in: KVM_CREATE_DEVICE_xxx */ +}; + +4.80 KVM_SET_DEVICE_ATTR/KVM_GET_DEVICE_ATTR + +Capability: KVM_CAP_DEVICE_CTRL +Type: device ioctl +Parameters: struct kvm_device_attr +Returns: 0 on success, -1 on error +Errors: + ENXIO: The group or attribute is unknown/unsupported for this device + EPERM: The attribute cannot (currently) be accessed this way + (e.g. read-only attribute, or attribute that only makes + sense when the device is in a different state) + + Other error conditions may be defined by individual device types. + +Gets/sets a specified piece of device configuration and/or state. The +semantics are device-specific. See individual device documentation in +the devices directory. As with ONE_REG, the size of the data +transferred is defined by the particular attribute. + +struct kvm_device_attr { + __u32 flags; /* no flags currently defined */ + __u32 group; /* device-defined */ + __u64 attr; /* group-defined */ + __u64 addr; /* userspace address of attr data */ +}; + +4.81 KVM_HAS_DEVICE_ATTR + +Capability: KVM_CAP_DEVICE_CTRL +Type: device ioctl +Parameters: struct kvm_device_attr +Returns: 0 on success, -1 on error +Errors: + ENXIO: The group or attribute is unknown/unsupported for this device + +Tests whether a device supports a particular attribute. A successful +return indicates the attribute is implemented. It does not necessarily +indicate that the attribute can be read or written in the device's +current state. addr is ignored. 4.77 KVM_ARM_VCPU_INIT diff --git a/Documentation/virtual/kvm/devices/README b/Documentation/virtual/kvm/devices/README new file mode 100644 index 000..34a6983 --- /dev/null +++ b/Documentation/virtual/kvm/devices/README @@ -0,0 +1 @@ +This directory contains specific device bindings for KVM_CAP_DEVICE_CTRL. diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index dcef724..6dab6b5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1064,6 +1064,41 @@ static inline bool
[PATCH 16/17] KVM: PPC: MPIC: Add support for KVM_IRQ_LINE
Now that all pieces are in place for reusing generic irq infrastructure, we can copy x86's implementation of KVM_IRQ_LINE irq injection and simply reuse it for PPC, as it will work there just as well. Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/include/uapi/asm/kvm.h |1 + arch/powerpc/kvm/powerpc.c | 13 + 2 files changed, 14 insertions(+), 0 deletions(-) diff --git a/arch/powerpc/include/uapi/asm/kvm.h b/arch/powerpc/include/uapi/asm/kvm.h index 3537bf3..dbb2ac2 100644 --- a/arch/powerpc/include/uapi/asm/kvm.h +++ b/arch/powerpc/include/uapi/asm/kvm.h @@ -26,6 +26,7 @@ #define __KVM_HAVE_SPAPR_TCE #define __KVM_HAVE_PPC_SMT #define __KVM_HAVE_IRQCHIP +#define __KVM_HAVE_IRQ_LINE struct kvm_regs { __u64 pc; diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index c431fea..874c106 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -33,6 +33,7 @@ #include asm/cputhreads.h #include asm/irqflags.h #include timing.h +#include irq.h #include ../mm/mmu_decl.h #define CREATE_TRACE_POINTS @@ -945,6 +946,18 @@ static int kvm_vm_ioctl_get_pvinfo(struct kvm_ppc_pvinfo *pvinfo) return 0; } +int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, + bool line_status) +{ + if (!irqchip_in_kernel(kvm)) + return -ENXIO; + + irq_event-status = kvm_set_irq(kvm, KVM_USERSPACE_IRQ_SOURCE_ID, + irq_event-irq, irq_event-level, + line_status); + return 0; +} + long kvm_arch_vm_ioctl(struct file *filp, unsigned int ioctl, unsigned long arg) { -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 06/17] KVM: Extract generic irqchip logic into irqchip.c
The current irq_comm.c file contains pieces of code that are generic across different irqchip implementations, as well as code that is fully IOAPIC specific. Split the generic bits out into irqchip.c. Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/kvm/Makefile |2 +- include/trace/events/kvm.h | 12 +++- virt/kvm/irq_comm.c| 118 -- virt/kvm/irqchip.c | 152 4 files changed, 163 insertions(+), 121 deletions(-) create mode 100644 virt/kvm/irqchip.c diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 04d3040..a797b8e 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,7 +7,7 @@ CFLAGS_vmx.o := -I. kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o eventfd.o \ - assigned-dev.o) + assigned-dev.o irqchip.o) kvm-$(CONFIG_IOMMU_API)+= $(addprefix ../../../virt/kvm/, iommu.o) kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o) diff --git a/include/trace/events/kvm.h b/include/trace/events/kvm.h index 19911dd..7005d11 100644 --- a/include/trace/events/kvm.h +++ b/include/trace/events/kvm.h @@ -37,7 +37,7 @@ TRACE_EVENT(kvm_userspace_exit, __entry-errno 0 ? -__entry-errno : __entry-reason) ); -#if defined(__KVM_HAVE_IRQ_LINE) +#if defined(CONFIG_HAVE_KVM_IRQCHIP) TRACE_EVENT(kvm_set_irq, TP_PROTO(unsigned int gsi, int level, int irq_source_id), TP_ARGS(gsi, level, irq_source_id), @@ -122,6 +122,10 @@ TRACE_EVENT(kvm_msi_set_irq, {KVM_IRQCHIP_PIC_SLAVE, PIC slave}, \ {KVM_IRQCHIP_IOAPIC,IOAPIC} +#endif /* defined(__KVM_HAVE_IOAPIC) */ + +#if defined(CONFIG_HAVE_KVM_IRQCHIP) + TRACE_EVENT(kvm_ack_irq, TP_PROTO(unsigned int irqchip, unsigned int pin), TP_ARGS(irqchip, pin), @@ -136,14 +140,18 @@ TRACE_EVENT(kvm_ack_irq, __entry-pin= pin; ), +#ifdef kvm_irqchips TP_printk(irqchip %s pin %u, __print_symbolic(__entry-irqchip, kvm_irqchips), __entry-pin) +#else + TP_printk(irqchip %d pin %u, __entry-irqchip, __entry-pin) +#endif ); +#endif /* defined(CONFIG_HAVE_KVM_IRQCHIP) */ -#endif /* defined(__KVM_HAVE_IOAPIC) */ #define KVM_TRACE_MMIO_READ_UNSATISFIED 0 #define KVM_TRACE_MMIO_READ 1 diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 7c0071d..d5008f4 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -151,59 +151,6 @@ static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e, return -EWOULDBLOCK; } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - struct kvm_kernel_irq_routing_entry route; - - if (!irqchip_in_kernel(kvm) || msi-flags != 0) - return -EINVAL; - - route.msi.address_lo = msi-address_lo; - route.msi.address_hi = msi-address_hi; - route.msi.data = msi-data; - - return kvm_set_msi(route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); -} - -/* - * Return value: - * 0 Interrupt was ignored (masked or not delivered for other reasons) - * = 0 Interrupt was coalesced (previous irq is still pending) - * 0 Number of CPUs interrupt was delivered to - */ -int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, - bool line_status) -{ - struct kvm_kernel_irq_routing_entry *e, irq_set[KVM_NR_IRQCHIPS]; - int ret = -1, i = 0; - struct kvm_irq_routing_table *irq_rt; - - trace_kvm_set_irq(irq, level, irq_source_id); - - /* Not possible to detect if the guest uses the PIC or the -* IOAPIC. So set the bit in both. The guest will ignore -* writes to the unused one. -*/ - rcu_read_lock(); - irq_rt = rcu_dereference(kvm-irq_routing); - if (irq irq_rt-nr_rt_entries) - hlist_for_each_entry(e, irq_rt-map[irq], link) - irq_set[i++] = *e; - rcu_read_unlock(); - - while(i--) { - int r; - r = irq_set[i].set(irq_set[i], kvm, irq_source_id, level, - line_status); - if (r 0) - continue; - - ret = r + ((ret 0) ? 0 : ret); - } - - return ret; -} - /* * Deliver an IRQ in an atomic context if we can, or return a failure, * user can retry in a process context. @@ -241,63 +188,6 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) return ret; } -bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin) -{ - struct kvm_irq_ack_notifier *kian; - int gsi; - - rcu_read_lock(); - gsi =
[PATCH 01/17] KVM: Add KVM_IRQCHIP_NUM_PINS in addition to KVM_IOAPIC_NUM_PINS
The concept of routing interrupt lines to an irqchip is nothing that is IOAPIC specific. Every irqchip has a maximum number of pins that can be linked to irq lines. So let's add a new define that allows us to reuse generic code for non-IOAPIC platforms. Signed-off-by: Alexander Graf ag...@suse.de --- arch/x86/include/asm/kvm_host.h |2 ++ include/linux/kvm_host.h|2 +- virt/kvm/irq_comm.c |2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 599f98b..f44c3fe 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -43,6 +43,8 @@ #define KVM_PIO_PAGE_OFFSET 1 #define KVM_COALESCED_MMIO_PAGE_OFFSET 2 +#define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS + #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 93a5005..bf3b1dc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -307,7 +307,7 @@ struct kvm_kernel_irq_routing_entry { #ifdef __KVM_HAVE_IOAPIC struct kvm_irq_routing_table { - int chip[KVM_NR_IRQCHIPS][KVM_IOAPIC_NUM_PINS]; + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; struct kvm_kernel_irq_routing_entry *rt_entries; u32 nr_rt_entries; /* diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index 25ab480..7c0071d 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -480,7 +480,7 @@ int kvm_set_irq_routing(struct kvm *kvm, new-nr_rt_entries = nr_rt_entries; for (i = 0; i 3; i++) - for (j = 0; j KVM_IOAPIC_NUM_PINS; j++) + for (j = 0; j KVM_IRQCHIP_NUM_PINS; j++) new-chip[i][j] = -1; for (i = 0; i nr; ++i) { -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/17] KVM: Move irq routing to generic code
The IRQ routing set ioctl lives in the hacky device assignment code inside of KVM today. This is definitely the wrong place for it. Move it to the much more natural kvm_main.c. Signed-off-by: Alexander Graf ag...@suse.de --- virt/kvm/assigned-dev.c | 30 -- virt/kvm/kvm_main.c | 30 ++ 2 files changed, 30 insertions(+), 30 deletions(-) diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c index f4c7f59..8db4370 100644 --- a/virt/kvm/assigned-dev.c +++ b/virt/kvm/assigned-dev.c @@ -983,36 +983,6 @@ long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl, goto out; break; } -#ifdef KVM_CAP_IRQ_ROUTING - case KVM_SET_GSI_ROUTING: { - struct kvm_irq_routing routing; - struct kvm_irq_routing __user *urouting; - struct kvm_irq_routing_entry *entries; - - r = -EFAULT; - if (copy_from_user(routing, argp, sizeof(routing))) - goto out; - r = -EINVAL; - if (routing.nr = KVM_MAX_IRQ_ROUTES) - goto out; - if (routing.flags) - goto out; - r = -ENOMEM; - entries = vmalloc(routing.nr * sizeof(*entries)); - if (!entries) - goto out; - r = -EFAULT; - urouting = argp; - if (copy_from_user(entries, urouting-entries, - routing.nr * sizeof(*entries))) - goto out_free_irq_routing; - r = kvm_set_irq_routing(kvm, entries, routing.nr, - routing.flags); - out_free_irq_routing: - vfree(entries); - break; - } -#endif /* KVM_CAP_IRQ_ROUTING */ #ifdef __KVM_HAVE_MSIX case KVM_ASSIGN_SET_MSIX_NR: { struct kvm_assigned_msix_nr entry_nr; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 2c3b226..b6f3354 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -2274,6 +2274,36 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING + case KVM_SET_GSI_ROUTING: { + struct kvm_irq_routing routing; + struct kvm_irq_routing __user *urouting; + struct kvm_irq_routing_entry *entries; + + r = -EFAULT; + if (copy_from_user(routing, argp, sizeof(routing))) + goto out; + r = -EINVAL; + if (routing.nr = KVM_MAX_IRQ_ROUTES) + goto out; + if (routing.flags) + goto out; + r = -ENOMEM; + entries = vmalloc(routing.nr * sizeof(*entries)); + if (!entries) + goto out; + r = -EFAULT; + urouting = argp; + if (copy_from_user(entries, urouting-entries, + routing.nr * sizeof(*entries))) + goto out_free_irq_routing; + r = kvm_set_irq_routing(kvm, entries, routing.nr, + routing.flags); + out_free_irq_routing: + vfree(entries); + break; + } +#endif /* CONFIG_HAVE_KVM_IRQ_ROUTING */ default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); if (r == -ENOTTY) -- 1.6.0.2 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 17/17] KVM: PPC: MPIC: Restrict to e500 platforms
On 04/18/2013 09:11:57 AM, Alexander Graf wrote: The code as is doesn't make any sense on non-e500 platforms. Why? What actually breaks? You never answered my earlier question about whether 74xx is supported. MPC86xx is 74xx-derived and has an FSL MPIC. Plus, as pointed out earlier, this limits allyesconfig build testing. -Scott -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC
On 04/18/2013 09:11:55 AM, Alexander Graf wrote: Now that all the irq routing and irqfd pieces are generic, we can expose real irqchip support to all of KVM's internal helpers. This allows us to use irqfd with the in-kernel MPIC. Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/include/asm/kvm_host.h |7 ++ arch/powerpc/include/uapi/asm/kvm.h |1 + arch/powerpc/kvm/Kconfig|3 + arch/powerpc/kvm/Makefile |1 + arch/powerpc/kvm/irq.h | 17 ++ arch/powerpc/kvm/mpic.c | 106 +++ 6 files changed, 135 insertions(+), 0 deletions(-) create mode 100644 arch/powerpc/kvm/irq.h diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 36368c9..d5fbb4b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -44,6 +44,10 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif +/* These values are internal and can be increased later */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 256 + #if !defined(CONFIG_KVM_440) #include linux/mmu_notifier.h @@ -256,6 +260,9 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; #endif +#ifdef CONFIG_KVM_MPIC + void *mpic; +#endif }; This can be struct openpic *mpic -- we already do this in the vcpu. diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h new file mode 100644 index 000..f1e27fd --- /dev/null +++ b/arch/powerpc/kvm/irq.h @@ -0,0 +1,17 @@ +#ifndef __IRQ_H +#define __IRQ_H + +#include linux/kvm_host.h + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm-arch.mpic != NULL); +#endif + smp_rmb(); + return ret; +} Couldn't we just set a non-irqchip-specific bool? Though eventually this shouldn't be needed at all -- instead the check would be does the requested irqchip fd exist and refer to something that exposes an irqchip interface? The rmb needs documentation. I don't see a corresponding wmb before writing to kvm-arch.mpic. diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index b1ae02d..c8de2f2 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1087,6 +1087,8 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr, } IRQ_resetbit(dst-servicing, s_IRQ); + /* Notify listeners that the IRQ is over */ + kvm_notify_acked_irq(opp-kvm, 0, s_IRQ); I don't think we want to call that with the lock held -- it looks like it can call back into kvm_set_irq. In our old internal version I waited until the end of the EOI code, and called the notify function with the lock dropped temporarily. /* Set up next servicing IRQ */ s_IRQ = IRQ_get_next(opp, dst-servicing); /* Check queued interrupts. */ @@ -1639,14 +1641,42 @@ static void mpic_destroy(struct kvm_device *dev) unmap_mmio(opp); } + dev-kvm-arch.mpic = NULL; kfree(opp); } +static int mpic_set_default_irq_routing(struct openpic *opp) +{ + int i; + struct kvm_irq_routing_entry *routing; + + /* XXX be more dynamic if we ever want to support multiple MPIC chips */ + routing = kzalloc((sizeof(*routing) * opp-nb_irqs), GFP_KERNEL); + if (!routing) + return -ENOMEM; + + for (i = 0; i opp-nb_irqs; i++) { + routing[i].gsi = i; + routing[i].type = KVM_IRQ_ROUTING_IRQCHIP; + routing[i].u.irqchip.irqchip = 0; + routing[i].u.irqchip.pin = i; + } + + kvm_set_irq_routing(opp-kvm, routing, opp-nb_irqs, 0); + + kfree(routing); + return 0; +} Do we really want any default routes? There's no platform notion of GSI here, so how is userspace to know how the kernel set it up (or what GSIs are free to be used for new routes) -- other than the read the code answer I got when I asked about x86? :-P +int kvm_set_routing_entry(struct kvm_irq_routing_table *rt, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +{ + int r = -EINVAL; + + switch (ue-type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e-set = mpic_set_irq; + e-irqchip.irqchip = ue-u.irqchip.irqchip; + e-irqchip.pin = ue-u.irqchip.pin; + if (e-irqchip.pin = KVM_IRQCHIP_NUM_PINS) + goto out; + rt-chip[ue-u.irqchip.irqchip][e-irqchip.pin] = ue-gsi; + break; + case KVM_IRQ_ROUTING_MSI: + e-set = kvm_set_msi; + e-msi.address_lo = ue-u.msi.address_lo; + e-msi.address_hi = ue-u.msi.address_hi; + e-msi.data = ue-u.msi.data; + break; +
Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC
On 18.04.2013, at 23:39, Scott Wood wrote: On 04/18/2013 09:11:55 AM, Alexander Graf wrote: Now that all the irq routing and irqfd pieces are generic, we can expose real irqchip support to all of KVM's internal helpers. This allows us to use irqfd with the in-kernel MPIC. Signed-off-by: Alexander Graf ag...@suse.de --- arch/powerpc/include/asm/kvm_host.h |7 ++ arch/powerpc/include/uapi/asm/kvm.h |1 + arch/powerpc/kvm/Kconfig|3 + arch/powerpc/kvm/Makefile |1 + arch/powerpc/kvm/irq.h | 17 ++ arch/powerpc/kvm/mpic.c | 106 +++ 6 files changed, 135 insertions(+), 0 deletions(-) create mode 100644 arch/powerpc/kvm/irq.h diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 36368c9..d5fbb4b 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -44,6 +44,10 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #endif +/* These values are internal and can be increased later */ +#define KVM_NR_IRQCHIPS 1 +#define KVM_IRQCHIP_NUM_PINS 256 + #if !defined(CONFIG_KVM_440) #include linux/mmu_notifier.h @@ -256,6 +260,9 @@ struct kvm_arch { #ifdef CONFIG_PPC_BOOK3S_64 struct list_head spapr_tce_tables; #endif +#ifdef CONFIG_KVM_MPIC +void *mpic; +#endif }; This can be struct openpic *mpic -- we already do this in the vcpu. There was a reason I made it void *, but I don't remember what it was. I can certainly try to make it struct openpic * again :). diff --git a/arch/powerpc/kvm/irq.h b/arch/powerpc/kvm/irq.h new file mode 100644 index 000..f1e27fd --- /dev/null +++ b/arch/powerpc/kvm/irq.h @@ -0,0 +1,17 @@ +#ifndef __IRQ_H +#define __IRQ_H + +#include linux/kvm_host.h + +static inline int irqchip_in_kernel(struct kvm *kvm) +{ +int ret = 0; + +#ifdef CONFIG_KVM_MPIC +ret = ret || (kvm-arch.mpic != NULL); +#endif +smp_rmb(); +return ret; +} Couldn't we just set a non-irqchip-specific bool? Though eventually this shouldn't be needed at all -- instead the check would be does the requested irqchip fd exist and refer to something that exposes an irqchip interface? How would we get the irqchip id? The rmb needs documentation. I don't see a corresponding wmb before writing to kvm-arch.mpic. I actually just copied it from the x86 code, wondering what the rmb() is supposed to do here. Do we need this at all? diff --git a/arch/powerpc/kvm/mpic.c b/arch/powerpc/kvm/mpic.c index b1ae02d..c8de2f2 100644 --- a/arch/powerpc/kvm/mpic.c +++ b/arch/powerpc/kvm/mpic.c @@ -1087,6 +1087,8 @@ static int openpic_cpu_write_internal(void *opaque, gpa_t addr, } IRQ_resetbit(dst-servicing, s_IRQ); +/* Notify listeners that the IRQ is over */ +kvm_notify_acked_irq(opp-kvm, 0, s_IRQ); I don't think we want to call that with the lock held -- it looks like it can call back into kvm_set_irq. In our old internal version I waited Yeah, usually it would call into a non-mpic set handler though IIUC, so we're safe. However, if you want to use an MPIC input pin as resample fd, then you'd be lost here. until the end of the EOI code, and called the notify function with the lock dropped temporarily. Sounds like a good idea. /* Set up next servicing IRQ */ s_IRQ = IRQ_get_next(opp, dst-servicing); /* Check queued interrupts. */ @@ -1639,14 +1641,42 @@ static void mpic_destroy(struct kvm_device *dev) unmap_mmio(opp); } +dev-kvm-arch.mpic = NULL; kfree(opp); } +static int mpic_set_default_irq_routing(struct openpic *opp) +{ +int i; +struct kvm_irq_routing_entry *routing; + +/* XXX be more dynamic if we ever want to support multiple MPIC chips */ +routing = kzalloc((sizeof(*routing) * opp-nb_irqs), GFP_KERNEL); +if (!routing) +return -ENOMEM; + +for (i = 0; i opp-nb_irqs; i++) { +routing[i].gsi = i; +routing[i].type = KVM_IRQ_ROUTING_IRQCHIP; +routing[i].u.irqchip.irqchip = 0; +routing[i].u.irqchip.pin = i; +} + +kvm_set_irq_routing(opp-kvm, routing, opp-nb_irqs, 0); + +kfree(routing); +return 0; +} Do we really want any default routes? There's no platform notion of GSI here, so how is userspace to know how the kernel set it up (or what GSIs are free to be used for new routes) -- other than the read the code answer I got when I asked about x86? :-P The default routes really are just expose all pins 1:1 as GSI. I think it makes sense to have a simple default for user space that doesn't want to mess with irq routing. What GSIs are free for new routes doesn't matter. Routes are always completely rewritten as a while from user space. So when user space goes in and wants to change only a
Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC
On 04/18/2013 07:15:46 PM, Alexander Graf wrote: On 18.04.2013, at 23:39, Scott Wood wrote: On 04/18/2013 09:11:55 AM, Alexander Graf wrote: +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm-arch.mpic != NULL); +#endif + smp_rmb(); + return ret; +} Couldn't we just set a non-irqchip-specific bool? Though eventually this shouldn't be needed at all -- instead the check would be does the requested irqchip fd exist and refer to something that exposes an irqchip interface? How would we get the irqchip id? I was thinking it would come from whatever operation you're trying to do, though I see that MSI routing doesn't specify an irqchip. :-P In that case I guess it would check for whether any MSI handler has been registered. The rmb needs documentation. I don't see a corresponding wmb before writing to kvm-arch.mpic. I actually just copied it from the x86 code, wondering what the rmb() is supposed to do here. Do we need this at all? Well, we don't want to start using the irqchip before it's been fully initialized -- but if we want to use barriers to accomplish that rather than a lock, there needs to be a wmb on the writing side. How would one create a route for IPIs, timers, etc (we have had customers wanting to assign those to KVM guests)? What about error interrupts on MPIC 4.2? How are you defining the pin field for MPIC? Shouldn't pin is basically what a source line is on the MPIC. That's what the equivalent of an IOAPIC interrupt line would be for the MPIC world. IPIs, timer interrupts and error interrupts are special vectors. We could probably model them as different KVM_IRQ_ROUTING_ types if we ever need to support mapping those to irqfd. Seems a bit heavyweight to add several new MPIC-specific routing types -- maybe just one new KVM_IRQ_ROUTING type that lets multiple words be used to describe the interrupt? For simple injection we can always do an ioctl on the MPIC device. I got complaints for that originally. :-) However, I'd be inclined to say that it's rather unlikely you'd want to have a vfio device's interrupt line hooked up to the IPI interrupt of your guest... Well, as I said, we've done it before for a customer (not with VFIO of course, but our old internal-tree device assignment mechanism), so not *that* unlikely. The host was a two-core chip running Linux on only one of the cores, and a custom OS on the other core. They wanted to communicate between the custom OS and a KVM guest. Since host Linux was only running on one core, it didn't need the IPIs for itself (and beginning with e500mc, the host uses msgsnd instead, so there also the host will not need the IPIs for itself). there be an API documentation update for this? Hrm. I can certainly add one. OK. We've had enough confusion from poor documentation in the device tree binding, due to Freescale documentation calling openpic irq 16 internal interrupt 0, that we should be clear exactly what it means. We also need to document whach irqchip means, if we want to reserve the ability to use it in the future -- otherwise userspace could fill in any old junk there and we'd need to retain compatibility. It should probably be the fd of the MPIC. It can't, because irqchip is an index into an array. We'd have to add another layer of indirection if we want to find the fd to a certain irqchip. That's why I simply restricted it to always be 0 now. OK, didn't know how firmly that was baked into the code. Maybe a device attribute for returning the irqchip number -- which would accommodate devices that expose multiple irqchips. It looks like you only support having one type of irqchip built into the kernel at a time? That may be OK for now given the existing restrictions for building KVM, but it should be noted as a limitation/TODO. I don't think it's really hard to add support for another irqchip type. But we certainly have harder issues to tackle first before we end up in a situation where in-kernel MPIC and in-kernel XICS would make sense in the same kernel. Not necessarily hard or hugely important, just looked odd putting global non-MPIC-specific functions in the MPIC file. I tend to prefer getting the component separation (at least resaonably) correct from the start, so the person who would end up needing to refactor to fit their device in doesn't need to mess with MPIC code to do so. Such a person might be unfamiliar with MPIC, have no easy way to test, etc. Similar to the current situation with the IRQ routing code, at least before you took it over. :-) -Scott -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC
On 19.04.2013, at 02:50, Scott Wood wrote: On 04/18/2013 07:15:46 PM, Alexander Graf wrote: On 18.04.2013, at 23:39, Scott Wood wrote: On 04/18/2013 09:11:55 AM, Alexander Graf wrote: +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm-arch.mpic != NULL); +#endif + smp_rmb(); + return ret; +} Couldn't we just set a non-irqchip-specific bool? Though eventually this shouldn't be needed at all -- instead the check would be does the requested irqchip fd exist and refer to something that exposes an irqchip interface? How would we get the irqchip id? I was thinking it would come from whatever operation you're trying to do, though I see that MSI routing doesn't specify an irqchip. :-P In that case I guess it would check for whether any MSI handler has been registered. I think you're over engineering here :). These are internal interfaces that whoever wants to implement a secondary irqchip can worry about. I merely wanted to make sure we don't block our road for multiple irqchips in the kernel-user interface from the beginning. Internal ones are a different story :). The rmb needs documentation. I don't see a corresponding wmb before writing to kvm-arch.mpic. I actually just copied it from the x86 code, wondering what the rmb() is supposed to do here. Do we need this at all? Well, we don't want to start using the irqchip before it's been fully initialized -- but if we want to use barriers to accomplish that rather than a lock, there needs to be a wmb on the writing side. How would one create a route for IPIs, timers, etc (we have had customers wanting to assign those to KVM guests)? What about error interrupts on MPIC 4.2? How are you defining the pin field for MPIC? Shouldn't pin is basically what a source line is on the MPIC. That's what the equivalent of an IOAPIC interrupt line would be for the MPIC world. IPIs, timer interrupts and error interrupts are special vectors. We could probably model them as different KVM_IRQ_ROUTING_ types if we ever need to support mapping those to irqfd. Seems a bit heavyweight to add several new MPIC-specific routing types -- maybe just one new KVM_IRQ_ROUTING type that lets multiple words be used to describe the interrupt? Well, we can add a single KVM_IRQ_ROUTING_MPIC type if that's better. But we don't have to do it now. I dislike code that we can't test, and I don't have a good test case for user space injected IPIs right now :). For simple injection we can always do an ioctl on the MPIC device. I got complaints for that originally. :-) There are 2 reasons why direct ioctls on the MPIC device could be bad 1) irqfd 2) code sharing I'm not convinced yet we care about performance for MPIC IPI, TMR or ERR interrupts. So irqfd is out of the question there. Code sharing only makes sense in areas where things are common. In case of the MPIC, this is for SRC interrupts. So I don't think there should be complaints here :). However, I'd be inclined to say that it's rather unlikely you'd want to have a vfio device's interrupt line hooked up to the IPI interrupt of your guest... Well, as I said, we've done it before for a customer (not with VFIO of course, but our old internal-tree device assignment mechanism), so not *that* unlikely. The host was a two-core chip running Linux on only one of the cores, and a custom OS on the other core. They wanted to communicate between the custom OS and a KVM guest. Since host Linux was only running on one core, it didn't need the IPIs for itself (and beginning with e500mc, the host uses msgsnd instead, so there also the host will not need the IPIs for itself). They could just as well use a guest SRC line for that, no? What the listener on the host connects to on the host's MPIC is pretty much orthogonal to what we inject into the guest. there be an API documentation update for this? Hrm. I can certainly add one. OK. We've had enough confusion from poor documentation in the device tree binding, due to Freescale documentation calling openpic irq 16 internal interrupt 0, that we should be clear exactly what it means. We also need to document whach irqchip means, if we want to reserve the ability to use it in the future -- otherwise userspace could fill in any old junk there and we'd need to retain compatibility. It should probably be the fd of the MPIC. It can't, because irqchip is an index into an array. We'd have to add another layer of indirection if we want to find the fd to a certain irqchip. That's why I simply restricted it to always be 0 now. OK, didn't know how firmly that was baked into the code. Maybe a device attribute for returning the irqchip number -- which would accommodate devices that expose multiple irqchips. That would work, yes. We'd have to have 2 mappings available irqchip number
Re: [PATCH 15/17] KVM: PPC: Support irq routing and irqfd for in-kernel MPIC
On 04/18/2013 08:09:23 PM, Alexander Graf wrote: On 19.04.2013, at 02:50, Scott Wood wrote: On 04/18/2013 07:15:46 PM, Alexander Graf wrote: On 18.04.2013, at 23:39, Scott Wood wrote: On 04/18/2013 09:11:55 AM, Alexander Graf wrote: +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + int ret = 0; + +#ifdef CONFIG_KVM_MPIC + ret = ret || (kvm-arch.mpic != NULL); +#endif + smp_rmb(); + return ret; +} Couldn't we just set a non-irqchip-specific bool? Though eventually this shouldn't be needed at all -- instead the check would be does the requested irqchip fd exist and refer to something that exposes an irqchip interface? How would we get the irqchip id? I was thinking it would come from whatever operation you're trying to do, though I see that MSI routing doesn't specify an irqchip. :-P In that case I guess it would check for whether any MSI handler has been registered. I think you're over engineering here :). These are internal interfaces that whoever wants to implement a secondary irqchip can worry about. I merely wanted to make sure we don't block our road for multiple irqchips in the kernel-user interface from the beginning. Internal ones are a different story :). Well, I did say eventually. My more immediate reaction was to seeing MPIC in a place it doesn't need to be, hence the question about a simple bool. Seems a bit heavyweight to add several new MPIC-specific routing types -- maybe just one new KVM_IRQ_ROUTING type that lets multiple words be used to describe the interrupt? Well, we can add a single KVM_IRQ_ROUTING_MPIC type if that's better. But we don't have to do it now. I dislike code that we can't test, and I don't have a good test case for user space injected IPIs right now :). Sure, it doesn't need to be now, as long as it's clear we have the extensibility to do it if/when the need arises. It likely will arise at least for error interrupts. I'd rather see it not have MPIC in the name, though. It's not MPIC-specific, just a new version of the struct with additional words for pin. For simple injection we can always do an ioctl on the MPIC device. I got complaints for that originally. :-) There are 2 reasons why direct ioctls on the MPIC device could be bad 1) irqfd 2) code sharing I'm not convinced yet we care about performance for MPIC IPI, TMR or ERR interrupts. So irqfd is out of the question there. It's out of the question because you're not yet convinced? :-) I think I'm done being surprised by what users try to do (though they may prove me wrong...). A different user complained about our non-KVM hypervisor because we didn't give guests direct access to the MPIC timers and IPIs, even though we provided adequate hypervisor-emulated mechanisms -- their application wanted to use hundreds of thousands of timers per second. They would not listen when we suggested that perhaps they could rearchitect things to not be as reliant on timers. We ended up implementing direct MPIC timer and IPI access there as well. As for error interrupts, no, they're not performance-critical, but we still probably want to use irqfd if we're using it for everything else. Even if the code for userspace reflection is already there, do you really want errors to be the one thing using a special IRQ path that's rarely tested? Beyond the extent to which it's necessary because of the specialness of the error interrupts in the hardware design. Code sharing only makes sense in areas where things are common. In case of the MPIC, this is for SRC interrupts. So I don't think there should be complaints here :). If you ignore irqfd (which was covered in #1), there isn't much that is really common even for SRC interrupts (and what is common is artificially so), and I made that point, but still got complaints. I was the one to point out that irqfd was the real reason to need to use the routing interface (and thus KVM_IRQ_LINE); they were more hung up on why are you being different?. However, I'd be inclined to say that it's rather unlikely you'd want to have a vfio device's interrupt line hooked up to the IPI interrupt of your guest... Well, as I said, we've done it before for a customer (not with VFIO of course, but our old internal-tree device assignment mechanism), so not *that* unlikely. The host was a two-core chip running Linux on only one of the cores, and a custom OS on the other core. They wanted to communicate between the custom OS and a KVM guest. Since host Linux was only running on one core, it didn't need the IPIs for itself (and beginning with e500mc, the host uses msgsnd instead, so there also the host will not need the IPIs for itself). They could just as well use a guest SRC line for that, no? What the listener on the host connects to on the host's MPIC is pretty much orthogonal to what we
[PATCH] KVM/PPC: emulate ehpriv
We can provide this emulation to simplify more extension later. Signed-off-by: Tiejun Chen tiejun.c...@windriver.com --- arch/powerpc/include/asm/disassemble.h |4 arch/powerpc/kvm/e500_emulate.c| 17 + 2 files changed, 21 insertions(+) diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h index 9b198d1..856f8de 100644 --- a/arch/powerpc/include/asm/disassemble.h +++ b/arch/powerpc/include/asm/disassemble.h @@ -77,4 +77,8 @@ static inline unsigned int get_d(u32 inst) return inst 0x; } +static inline unsigned int get_oc(u32 inst) +{ + return (inst 11) 0x7fff; +} #endif /* __ASM_PPC_DISASSEMBLE_H__ */ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index e78f353..36492cf 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -26,6 +26,7 @@ #define XOP_TLBRE 946 #define XOP_TLBWE 978 #define XOP_TLBILX 18 +#define XOP_EHPRIV 270 #ifdef CONFIG_KVM_E500MC static int dbell2prio(ulong param) @@ -80,6 +81,18 @@ static int kvmppc_e500_emul_msgsnd(struct kvm_vcpu *vcpu, int rb) return EMULATE_DONE; } + +static int kvmppc_e500_emul_ehpriv(struct kvm_run *run, struct kvm_vcpu *vcpu, + unsigned int inst) +{ + int emulated = EMULATE_DONE; + + switch (get_oc(inst)) { + default: + emulated = EMULATE_FAIL; + } + return emulated; +} #endif int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, @@ -130,6 +143,10 @@ int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); break; + case XOP_EHPRIV: + emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst); + break; + default: emulated = EMULATE_FAIL; } -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] KVM: PPC: Book3S HV: Make HPT reading code notice R/C bit changes
At present, the code that determines whether a HPT entry has changed, and thus needs to be sent to userspace when it is copying the HPT, doesn't consider a hardware update to the reference and change bits (R and C) in the HPT entries to constitute a change that needs to be sent to userspace. This adds code to check for changes in R and C when we are scanning the HPT to find changed entries, and adds code to set the changed flag for the HPTE when we update the R and C bits in the guest view of the HPTE. Since we now need to set the HPTE changed flag in book3s_64_mmu_hv.c as well as book3s_hv_rm_mmu.c, we move the note_hpte_modification() function into kvm_book3s_64.h. Current Linux guest kernels don't use the hardware updates of R and C in the HPT, so this change won't affect them. Linux (or other) kernels might in future want to use the R and C bits and have them correctly transferred across when a guest is migrated, so it is better to correct this deficiency. Signed-off-by: Paul Mackerras pau...@samba.org --- arch/powerpc/include/asm/kvm_book3s_64.h | 13 +++ arch/powerpc/kvm/book3s_64_mmu_hv.c | 59 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 11 -- 3 files changed, 63 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 38bec1d..9c1ff33 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -268,4 +268,17 @@ static inline int is_vrma_hpte(unsigned long hpte_v) (HPTE_V_1TB_SEG | (VRMA_VSID (40 - 16))); } +#ifdef CONFIG_KVM_BOOK3S_64_HV +/* + * Note modification of an HPTE; set the HPTE modified bit + * if anyone is interested. + */ +static inline void note_hpte_modification(struct kvm *kvm, + struct revmap_entry *rev) +{ + if (atomic_read(kvm-arch.hpte_mod_interest)) + rev-guest_rpte |= HPTE_GR_MODIFIED; +} +#endif /* CONFIG_KVM_BOOK3S_64_HV */ + #endif /* __ASM_KVM_BOOK3S_64_H__ */ diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 8cc18ab..d641a66 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -893,7 +893,10 @@ static int kvm_unmap_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Harvest R and C */ rcbits = hptep[1] (HPTE_R_R | HPTE_R_C); *rmapp |= rcbits KVMPPC_RMAP_RC_SHIFT; - rev[i].guest_rpte = ptel | rcbits; + if (rcbits ~rev[i].guest_rpte) { + rev[i].guest_rpte = ptel | rcbits; + note_hpte_modification(kvm, rev[i]); + } } unlock_rmap(rmapp); hptep[0] = ~HPTE_V_HVLOCK; @@ -976,7 +979,10 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp, /* Now check and modify the HPTE */ if ((hptep[0] HPTE_V_VALID) (hptep[1] HPTE_R_R)) { kvmppc_clear_ref_hpte(kvm, hptep, i); - rev[i].guest_rpte |= HPTE_R_R; + if (!(rev[i].guest_rpte HPTE_R_R)) { + rev[i].guest_rpte |= HPTE_R_R; + note_hpte_modification(kvm, rev[i]); + } ret = 1; } hptep[0] = ~HPTE_V_HVLOCK; @@ -1080,7 +1086,10 @@ static int kvm_test_clear_dirty(struct kvm *kvm, unsigned long *rmapp) hptep[1] = ~HPTE_R_C; eieio(); hptep[0] = (hptep[0] ~HPTE_V_ABSENT) | HPTE_V_VALID; - rev[i].guest_rpte |= HPTE_R_C; + if (!(rev[i].guest_rpte HPTE_R_C)) { + rev[i].guest_rpte |= HPTE_R_C; + note_hpte_modification(kvm, rev[i]); + } ret = 1; } hptep[0] = ~HPTE_V_HVLOCK; @@ -1193,16 +1202,36 @@ struct kvm_htab_ctx { #define HPTE_SIZE (2 * sizeof(unsigned long)) +/* + * Returns 1 if this HPT entry has been modified or has pending + * R/C bit changes. + */ +static int hpte_dirty(struct revmap_entry *revp, unsigned long *hptp) +{ + unsigned long rcbits_unset; + + if (revp-guest_rpte HPTE_GR_MODIFIED) + return 1; + + /* Also need to consider changes in reference and changed bits */ + rcbits_unset = ~revp-guest_rpte (HPTE_R_R | HPTE_R_C); + if ((hptp[0] HPTE_V_VALID) (hptp[1] rcbits_unset)) + return 1; + + return 0; +} + static long record_hpte(unsigned long flags, unsigned long *hptp, unsigned long *hpte, struct revmap_entry *revp, int