[PATCH] KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer
Scott Wood pointed out that We are no longer using SPRG1 for vcpu pointer, but using SPRN_SPRG_THREAD = SPRG3 (thread-vcpu). So this comment is not valid now. Note: SPRN_SPRG3R is not supported (do not see any need as of now), and if we want to support this in future then we have to shift to using SPRG1 for VCPU pointer. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- arch/powerpc/include/asm/reg.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1f34ef7..d46d92b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -945,9 +945,6 @@ * readable variant for reads, which can avoid a fault * with KVM type virtualization. * - * (*) Under KVM, the host SPRG1 is used to point to - * the current VCPU data structure - * * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors -- 1.9.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer
On 25.07.14 08:02, Bharat Bhushan wrote: Scott Wood pointed out that We are no longer using SPRG1 for vcpu pointer, but using SPRN_SPRG_THREAD = SPRG3 (thread-vcpu). So this comment is not valid now. Note: SPRN_SPRG3R is not supported (do not see any need as of now), and if we want to support this in future then we have to shift to using SPRG1 for VCPU pointer. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com Thanks, applied to kvm-ppc-queue. Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v9 1/4] arm: add ARMv7 HYP API to flush VM TLBs, change generic TLB flush to support arch flush
On 25.07.14 02:56, Mario Smarduch wrote: Patch adds HYP interface for global VM TLB invalidation without address parameter. Generic VM TLB flush calls ARMv7 arch defined TLB flush function. Signed-off-by: Mario Smarduch m.smard...@samsung.com --- arch/arm/include/asm/kvm_asm.h |1 + arch/arm/include/asm/kvm_host.h |1 + arch/arm/kvm/Kconfig|1 + arch/arm/kvm/interrupts.S | 12 arch/arm/kvm/mmu.c | 17 + virt/kvm/Kconfig|3 +++ virt/kvm/kvm_main.c |4 7 files changed, 39 insertions(+) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 53b3c4a..21bc519 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -78,6 +78,7 @@ extern char __kvm_hyp_code_end[]; extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); +extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); #endif diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 193ceaf..042206f 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -230,5 +230,6 @@ int kvm_perf_teardown(void); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); +void kvm_arch_flush_remote_tlbs(struct kvm *); #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 466bd29..44d3b6f 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO + select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_ARM_HOST depends on ARM_VIRT_EXT ARM_LPAE ---help--- diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 0d68d40..1258d46 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -66,6 +66,18 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) bx lr ENDPROC(__kvm_tlb_flush_vmid_ipa) +/** + * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs + * + * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address + * parameter + */ + +ENTRY(__kvm_tlb_flush_vmid) + b __kvm_tlb_flush_vmid_ipa +ENDPROC(__kvm_tlb_flush_vmid) + + / * Flush TLBs and instruction caches of all CPUs inside the inner-shareable * domain, for all VMIDs diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2ac9588..35254c6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -56,6 +56,23 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } +#ifdef CONFIG_ARM Why the ifdef? We're in ARM code here, no? +/** + * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries + * @kvm: pointer to kvm structure. + * + * Interface to HYP function to flush all VM TLB entries without address + * parameter. In HYP mode reuses __kvm_tlb_flush_vmid_ipa() function used by + * kvm_tlb_flush_vmid_ipa(). + */ +void kvm_arch_flush_remote_tlbs(struct kvm *kvm) +{ + if (kvm) + kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); I don't see why we should ever call this function with kvm==NULL. Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v9 2/4] arm: ARMv7 dirty page logging inital mem region write protect (w/no huge PUD support)
On 25.07.14 02:56, Mario Smarduch wrote: Patch adds support for initial write protection VM memlsot. This patch series assumes that huge PUDs will not be used in 2nd stage tables. Is this a valid assumption? Signed-off-by: Mario Smarduch m.smard...@samsung.com --- arch/arm/include/asm/kvm_host.h |1 + arch/arm/include/asm/kvm_mmu.h| 20 ++ arch/arm/include/asm/pgtable-3level.h |1 + arch/arm/kvm/arm.c|9 +++ arch/arm/kvm/mmu.c| 128 + 5 files changed, 159 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 042206f..6521a2d 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -231,5 +231,6 @@ int kvm_perf_teardown(void); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); void kvm_arch_flush_remote_tlbs(struct kvm *); +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f..08ab5e8 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) pmd_val(*pmd) |= L_PMD_S2_RDWR; } +static inline void kvm_set_s2pte_readonly(pte_t *pte) +{ + pte_val(*pte) = (pte_val(*pte) ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; +} + +static inline bool kvm_s2pte_readonly(pte_t *pte) +{ + return (pte_val(*pte) L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; +} + +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) +{ + pmd_val(*pmd) = (pmd_val(*pmd) ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; +} + +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) +{ + return (pmd_val(*pmd) L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; +} + /* Open coded p*d_addr_end that can deal with 64bit addresses */ #define kvm_pgd_addr_end(addr, end) \ ({u64 __boundary = ((addr) + PGDIR_SIZE) PGDIR_MASK;\ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 85c60ad..d8bb40b 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -129,6 +129,7 @@ #define L_PTE_S2_RDONLY (_AT(pteval_t, 1) 6) /* HAP[1] */ #define L_PTE_S2_RDWR (_AT(pteval_t, 3) 6) /* HAP[2:1] */ +#define L_PMD_S2_RDONLY (_AT(pteval_t, 1) 6) /* HAP[1] */ #define L_PMD_S2_RDWR (_AT(pmdval_t, 3) 6) /* HAP[2:1] */ /* diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..e11c2dd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, enum kvm_mr_change change) { +#ifdef CONFIG_ARM Same question on CONFIG_ARM here. Is this the define used to distinguish between 32bit and 64bit? Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] watchdog: control hard lockup detection default
- Original Message - From: Andrew Jones drjo...@redhat.com To: linux-ker...@vger.kernel.org, kvm@vger.kernel.org Cc: uober...@redhat.com, dzic...@redhat.com, pbonz...@redhat.com, a...@linux-foundation.org, mi...@redhat.com Sent: Thursday, July 24, 2014 12:13:30 PM Subject: [PATCH 2/3] watchdog: control hard lockup detection default [...] The running kernel still has the ability to enable/disable at any time with /proc/sys/kernel/nmi_watchdog us usual. However even when the default has been overridden /proc/sys/kernel/nmi_watchdog will initially show '1'. To truly turn it on one must disable/enable it, i.e. echo 0 /proc/sys/kernel/nmi_watchdog echo 1 /proc/sys/kernel/nmi_watchdog [...] @@ -626,15 +665,17 @@ int proc_dowatchdog(struct ctl_table *table, int write, * disabled. The 'watchdog_running' variable check in * watchdog_*_all_cpus() function takes care of this. */ -if (watchdog_user_enabled watchdog_thresh) +if (watchdog_user_enabled watchdog_thresh) { +watchdog_enable_hardlockup_detector(true); err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); -else +} else [...] I just realized a possible issue in the above part of the patch: If we would want to give the user the option to override the effect of patch 3/3 via /proc, I think proc_dowatchdog() should enable hard lockup detection _only_ in case of a state transition from 'NOT watchdog_running' to 'watchdog_running'. | if (watchdog_user_enabled watchdog_thresh) {| need to add this if (!watchdog_running) ---' watchdog_enable_hardlockup_detector(true); err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); } else ... The additional 'if (!watchdog_running)' would _require_ the user to perform the sequence of commands echo 0 /proc/sys/kernel/nmi_watchdog echo 1 /proc/sys/kernel/nmi_watchdog to enable hard lockup detection explicitly. I think changing the 'watchdog_thresh' while 'watchdog_running' is true should _not_ enable hard lockup detection as a side-effect, because a user may have a 'sysctl.conf' entry such as kernel.watchdog_thresh = ... or may only want to change the 'watchdog_thresh' on the fly. I think the following flow of execution could cause such undesired side-effect. proc_dowatchdog if (watchdog_user_enabled watchdog_thresh) { watchdog_enable_hardlockup_detector hardlockup_detector_enabled = true watchdog_enable_all_cpus if (!watchdog_running) { ... } else if (sample_period_changed) update_timers_all_cpus for_each_online_cpu update_timers watchdog_nmi_disable ... watchdog_nmi_enable watchdog_hardlockup_detector_is_enabled return true enable perf counter for hard lockup detection Regards, Uli -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
Hi Peter, On Thu, Jul 24, 2014 at 09:05:39PM +0100, Peter Maydell wrote: On 24 July 2014 20:55, Will Deacon will.dea...@arm.com wrote: Again, that can be solved by introduced Marc's attr for determining the GICV offset within the 64k page. I don't think that's -stable material. Agreed that we don't want to put Marc's patchset in -stable (and that without it systems with GICV in their host devicetree at pagebase+60K are unusable, so we're not actually regressing anything if we put this into stable). But... I can't think of any way of determining whether a particular system gets this right or wrong automatically, which suggests perhaps we need to allow the device tree to specify that the GICV is 64k-page-safe... When we support such systems, I also think we'll need a device-tree change. My main concern right now is stopping the ability to hose the entire machine by trying to instantiate a virtual GIC. ...I don't see how your patch prevents instantiating a VGIC and hosing the machine on a system where the 64K with the GICV registers in it goes [GICV registers] [machine blows up if you read this] 0K 8K 64K True, if such a machine existed, then this patch wouldn't detect it. I don't think we support anything like that in mainline at the moment, but the following additional diff should solve the problem, no? diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index fa9a95b3ed19..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; kvm_info(%s@%llx IRQ%d\n, vgic_node-name, Whether the 64K page contains Bad Stuff is completely orthogonal to whether the device tree offset the host has for the GICV is 0K, 60K or anything in between. What you should be checking for is is this system design broken?, which is probably a device tree attribute. Actually, I think we can just claim that the GICV occupies the full 64k region and allow the offset within that region to be acquired via the new ioctl. Will -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 25 July 2014 10:31, Will Deacon will.dea...@arm.com wrote: On Thu, Jul 24, 2014 at 09:05:39PM +0100, Peter Maydell wrote: On 24 July 2014 20:55, Will Deacon will.dea...@arm.com wrote: Again, that can be solved by introduced Marc's attr for determining the GICV offset within the 64k page. I don't think that's -stable material. Agreed that we don't want to put Marc's patchset in -stable (and that without it systems with GICV in their host devicetree at pagebase+60K are unusable, so we're not actually regressing anything if we put this into stable). But... I can't think of any way of determining whether a particular system gets this right or wrong automatically, which suggests perhaps we need to allow the device tree to specify that the GICV is 64k-page-safe... When we support such systems, I also think we'll need a device-tree change. My main concern right now is stopping the ability to hose the entire machine by trying to instantiate a virtual GIC. ...I don't see how your patch prevents instantiating a VGIC and hosing the machine on a system where the 64K with the GICV registers in it goes [GICV registers] [machine blows up if you read this] 0K 8K 64K True, if such a machine existed, then this patch wouldn't detect it. I don't think we support anything like that in mainline at the moment, but the following additional diff should solve the problem, no? Well, the apm-storm.dtsi specifies a 64K aligned GICV base but a size of 8K, so presumably it qualifies. (If the GICV size should really be 64K and that's an APM device tree bug then this patch is going to make KVM on 64K page hosts stop working on that board... somebody with access to the h/w docs for the APM boards needs to check that, I guess.) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index fa9a95b3ed19..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; kvm_info(%s@%llx IRQ%d\n, vgic_node-name, Yes, I think if we check both start and size are page aligned then this will both: (a) avoid using the VGIC on systems where it's going to allow the guest to take down the machine (b) avoid using the VGIC if it's not at a page boundary, pending Marc's patchset landing and making that case actually work rather than do weird things. thanks -- PMM -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] watchdog: control hard lockup detection default
On Fri, Jul 25, 2014 at 04:32:55AM -0400, Ulrich Obergfell wrote: - Original Message - From: Andrew Jones drjo...@redhat.com To: linux-ker...@vger.kernel.org, kvm@vger.kernel.org Cc: uober...@redhat.com, dzic...@redhat.com, pbonz...@redhat.com, a...@linux-foundation.org, mi...@redhat.com Sent: Thursday, July 24, 2014 12:13:30 PM Subject: [PATCH 2/3] watchdog: control hard lockup detection default [...] The running kernel still has the ability to enable/disable at any time with /proc/sys/kernel/nmi_watchdog us usual. However even when the default has been overridden /proc/sys/kernel/nmi_watchdog will initially show '1'. To truly turn it on one must disable/enable it, i.e. echo 0 /proc/sys/kernel/nmi_watchdog echo 1 /proc/sys/kernel/nmi_watchdog [...] @@ -626,15 +665,17 @@ int proc_dowatchdog(struct ctl_table *table, int write, * disabled. The 'watchdog_running' variable check in * watchdog_*_all_cpus() function takes care of this. */ -if (watchdog_user_enabled watchdog_thresh) +if (watchdog_user_enabled watchdog_thresh) { +watchdog_enable_hardlockup_detector(true); err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); -else +} else [...] I just realized a possible issue in the above part of the patch: If we would want to give the user the option to override the effect of patch 3/3 via /proc, I think proc_dowatchdog() should enable hard lockup detection _only_ in case of a state transition from 'NOT watchdog_running' to 'watchdog_running'. | if (watchdog_user_enabled watchdog_thresh) {| need to add this if (!watchdog_running) ---' watchdog_enable_hardlockup_detector(true); err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh); } else ... The additional 'if (!watchdog_running)' would _require_ the user to perform the sequence of commands echo 0 /proc/sys/kernel/nmi_watchdog echo 1 /proc/sys/kernel/nmi_watchdog to enable hard lockup detection explicitly. I think changing the 'watchdog_thresh' while 'watchdog_running' is true should _not_ enable hard lockup detection as a side-effect, because a user may have a 'sysctl.conf' entry such as kernel.watchdog_thresh = ... or may only want to change the 'watchdog_thresh' on the fly. I think the following flow of execution could cause such undesired side-effect. proc_dowatchdog if (watchdog_user_enabled watchdog_thresh) { watchdog_enable_hardlockup_detector hardlockup_detector_enabled = true watchdog_enable_all_cpus if (!watchdog_running) { ... } else if (sample_period_changed) update_timers_all_cpus for_each_online_cpu update_timers watchdog_nmi_disable ... watchdog_nmi_enable watchdog_hardlockup_detector_is_enabled return true enable perf counter for hard lockup detection Regards, Uli Nice catch. Looks like this will need a v2. Paolo, do we have a consensus on the proc echoing? Or should that be revisited in the v2 as well? drew -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/4] kvm: Resolve missing-field-initializers warnings
From: Mark Rustad mark.d.rus...@intel.com Resolve missing-field-initializers warnings seen in W=2 kernel builds by having macros generate more elaborated initializers. That is enough to silence the warnings. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- virt/kvm/irq_comm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ced4a54..a228ee8 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -323,13 +323,13 @@ out: #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } + .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) #ifdef CONFIG_X86 # define PIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } + .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } # define ROUTING_ENTRY2(irq) \ IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) #else -- 1.9.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4] x86/kvm: Resolve shadow warning from min macro
From: Mark Rustad mark.d.rus...@intel.com Resolve a shadow warning generated in W=2 builds by the nested use of the min macro by instead using the min3 macro for the minimum of 3 values. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- arch/x86/kvm/emulate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e4e833d..2d6c97a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1315,8 +1315,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, in_page = (ctxt-eflags EFLG_DF) ? offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); - n = min(min(in_page, (unsigned int)sizeof(rc-data)) / size, - count); + n = min3(in_page, (unsigned int)sizeof(rc-data) / size, count); if (n == 0) n = 1; rc-pos = rc-end = 0; -- 1.9.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] x86/kvm: Resolve some missing-initializers warnings
From: Mark Rustad mark.d.rus...@intel.com Resolve some missing-initializers warnings that appear in W=2 builds. They are resolved by using a designated initialization, which is enough to resolve the warning. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef432f8..6d0f47208 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -82,8 +82,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x) .offset = offsetof(struct kvm, stat.x), KVM_STAT_VM +#define VCPU_STAT(x) .offset = offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); -- 1.9.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/4] x86/kvm: Resolve shadow warnings in macro expansion
From: Mark Rustad mark.d.rus...@intel.com Resolve shadow warnings that appear in W=2 builds. In this case, a macro declared an inner local variable with the same name as an outer one. This can be a serious hazard in the event that the outer variable is ever passed as a parameter, as the inner variable will be referenced instead of what was intended. This macro doesn't have any parameters - at this time - but prepend an _ to all of the macro-declared variables as is the custom, to resolve the warnings and eliminate any future hazard. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- arch/x86/kvm/mmutrace.h | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 9d2e0ff..2d8d00c 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -22,26 +22,26 @@ __entry-unsync = sp-unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ - const char *ret = p-buffer + p-len; \ - static const char *access_str[] = { \ + const char *_ret = p-buffer + p-len; \ + static const char *_access_str[] = {\ ---, --x, w--, w-x, -u-, -ux, wu-, wux \ }; \ - union kvm_mmu_page_role role; \ + union kvm_mmu_page_role _role; \ \ - role.word = __entry-role; \ + _role.word = __entry-role; \ \ trace_seq_printf(p, sp gen %lx gfn %llx %u%s q%u%s %s%s \ %snxe root %u %s%c, __entry-mmu_valid_gen, \ -__entry-gfn, role.level, \ -role.cr4_pae ? pae : ,\ -role.quadrant, \ -role.direct ? direct : , \ -access_str[role.access], \ -role.invalid ? invalid : ,\ -role.nxe ? : !, \ +__entry-gfn, _role.level, \ +_role.cr4_pae ? pae : , \ +_role.quadrant,\ +_role.direct ? direct : , \ +_access_str[_role.access], \ +_role.invalid ? invalid : , \ +_role.nxe ? : !, \ __entry-root_count, \ __entry-unsync ? unsync : sync, 0); \ - ret;\ + _ret; \ }) #define kvm_mmu_trace_pferr_flags \ -- 1.9.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
I can't think of any way of determining whether a particular system gets this right or wrong automatically, which suggests perhaps we need to allow the device tree to specify that the GICV is 64k-page-safe... When we support such systems, I also think we'll need a device-tree change. My main concern right now is stopping the ability to hose the entire machine by trying to instantiate a virtual GIC. ...I don't see how your patch prevents instantiating a VGIC and hosing the machine on a system where the 64K with the GICV registers in it goes [GICV registers] [machine blows up if you read this] 0K 8K 64K True, if such a machine existed, then this patch wouldn't detect it. I don't think we support anything like that in mainline at the moment, but the following additional diff should solve the problem, no? diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index fa9a95b3ed19..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; kvm_info(%s@%llx IRQ%d\n, vgic_node-name, This would break with my SOC device tree which looks like this. Note this device tree works just fine without checks. gic: interrupt-controller@e1101000 { compatible = arm,gic-400-v2m; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/4] x86/kvm: Resolve some missing-initializers warnings
Il 25/07/2014 15:27, Jeff Kirsher ha scritto: From: Mark Rustad mark.d.rus...@intel.com Resolve some missing-initializers warnings that appear in W=2 builds. They are resolved by using a designated initialization, which is enough to resolve the warning. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ef432f8..6d0f47208 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -82,8 +82,8 @@ u64 __read_mostly efer_reserved_bits = ~((u64)(EFER_SCE | EFER_LME | EFER_LMA)); static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); #endif -#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM -#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU +#define VM_STAT(x) .offset = offsetof(struct kvm, stat.x), KVM_STAT_VM +#define VCPU_STAT(x) .offset = offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU static void update_cr8_intercept(struct kvm_vcpu *vcpu); static void process_nmi(struct kvm_vcpu *vcpu); This is really ugly. Either add the missing initializer (it's just a NULL) or, better, move the name into VM_STAT/VCPU_STAT and add the missing initializer to VM_STAT/VCPU_STAT. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/4] x86/kvm: Resolve shadow warnings in macro expansion
Il 25/07/2014 15:27, Jeff Kirsher ha scritto: From: Mark Rustad mark.d.rus...@intel.com Resolve shadow warnings that appear in W=2 builds. In this case, a macro declared an inner local variable with the same name as an outer one. This can be a serious hazard in the event that the outer variable is ever passed as a parameter, as the inner variable will be referenced instead of what was intended. This macro doesn't have any parameters - at this time - but prepend an _ to all of the macro-declared variables as is the custom, to resolve the warnings and eliminate any future hazard. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- arch/x86/kvm/mmutrace.h | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 9d2e0ff..2d8d00c 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -22,26 +22,26 @@ __entry-unsync = sp-unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ - const char *ret = p-buffer + p-len; \ - static const char *access_str[] = { \ + const char *_ret = p-buffer + p-len; \ + static const char *_access_str[] = {\ ---, --x, w--, w-x, -u-, -ux, wu-, wux \ }; \ - union kvm_mmu_page_role role; \ + union kvm_mmu_page_role _role; \ \ - role.word = __entry-role; \ + _role.word = __entry-role; \ \ trace_seq_printf(p, sp gen %lx gfn %llx %u%s q%u%s %s%s \ %snxe root %u %s%c, __entry-mmu_valid_gen, \ - __entry-gfn, role.level, \ - role.cr4_pae ? pae : ,\ - role.quadrant, \ - role.direct ? direct : , \ - access_str[role.access], \ - role.invalid ? invalid : ,\ - role.nxe ? : !, \ + __entry-gfn, _role.level, \ + _role.cr4_pae ? pae : , \ + _role.quadrant,\ + _role.direct ? direct : , \ + _access_str[_role.access], \ + _role.invalid ? invalid : , \ + _role.nxe ? : !, \ __entry-root_count, \ __entry-unsync ? unsync : sync, 0); \ - ret;\ + _ret; \ }) #define kvm_mmu_trace_pferr_flags \ I think this unnecessarily uglifies the code, so I am not applying it. Gleb, what do you think? Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/4] kvm: Resolve missing-field-initializers warnings
Il 25/07/2014 15:27, Jeff Kirsher ha scritto: From: Mark Rustad mark.d.rus...@intel.com Resolve missing-field-initializers warnings seen in W=2 kernel builds by having macros generate more elaborated initializers. That is enough to silence the warnings. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- virt/kvm/irq_comm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c index ced4a54..a228ee8 100644 --- a/virt/kvm/irq_comm.c +++ b/virt/kvm/irq_comm.c @@ -323,13 +323,13 @@ out: #define IOAPIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = KVM_IRQCHIP_IOAPIC, .u.irqchip.pin = (irq) } + .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } } #define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq) #ifdef CONFIG_X86 # define PIC_ROUTING_ENTRY(irq) \ { .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP, \ - .u.irqchip.irqchip = SELECT_PIC(irq), .u.irqchip.pin = (irq) % 8 } + .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } } # define ROUTING_ENTRY2(irq) \ IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq) #else Applied. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/4] x86/kvm: Resolve shadow warning from min macro
Il 25/07/2014 15:27, Jeff Kirsher ha scritto: From: Mark Rustad mark.d.rus...@intel.com Resolve a shadow warning generated in W=2 builds by the nested use of the min macro by instead using the min3 macro for the minimum of 3 values. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- arch/x86/kvm/emulate.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e4e833d..2d6c97a 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1315,8 +1315,7 @@ static int pio_in_emulated(struct x86_emulate_ctxt *ctxt, in_page = (ctxt-eflags EFLG_DF) ? offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)) : PAGE_SIZE - offset_in_page(reg_read(ctxt, VCPU_REGS_RDI)); - n = min(min(in_page, (unsigned int)sizeof(rc-data)) / size, - count); + n = min3(in_page, (unsigned int)sizeof(rc-data) / size, count); if (n == 0) n = 1; rc-pos = rc-end = 0; Applied. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 25 July 2014 15:02, Joel Schopp joel.sch...@amd.com wrote: --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; kvm_info(%s@%llx IRQ%d\n, vgic_node-name, This would break with my SOC device tree which looks like this. Note this device tree works just fine without checks. gic: interrupt-controller@e1101000 { compatible = arm,gic-400-v2m; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; That has a non-aligned GICV -- does it really work on a mainline kernel without Marc's patches to cope with GICV which aren't at the base of a page? I can't see how... thanks -- PMM -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
Hi Joel, On Fri, Jul 25, 2014 at 03:02:58PM +0100, Joel Schopp wrote: I can't think of any way of determining whether a particular system gets this right or wrong automatically, which suggests perhaps we need to allow the device tree to specify that the GICV is 64k-page-safe... When we support such systems, I also think we'll need a device-tree change. My main concern right now is stopping the ability to hose the entire machine by trying to instantiate a virtual GIC. ...I don't see how your patch prevents instantiating a VGIC and hosing the machine on a system where the 64K with the GICV registers in it goes [GICV registers] [machine blows up if you read this] 0K 8K 64K True, if such a machine existed, then this patch wouldn't detect it. I don't think we support anything like that in mainline at the moment, but the following additional diff should solve the problem, no? diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index fa9a95b3ed19..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; kvm_info(%s@%llx IRQ%d\n, vgic_node-name, This would break with my SOC device tree which looks like this. Note this device tree works just fine without checks. gic: interrupt-controller@e1101000 { compatible = arm,gic-400-v2m; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; I appreciate it may work, but that's only because the kernel is actually using an alias of GICV at 0xe116 by accident. I would say that you're getting away with passing an incorrect description. Will -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/25/2014 09:08 AM, Will Deacon wrote: Hi Joel, On Fri, Jul 25, 2014 at 03:02:58PM +0100, Joel Schopp wrote: I can't think of any way of determining whether a particular system gets this right or wrong automatically, which suggests perhaps we need to allow the device tree to specify that the GICV is 64k-page-safe... When we support such systems, I also think we'll need a device-tree change. My main concern right now is stopping the ability to hose the entire machine by trying to instantiate a virtual GIC. ...I don't see how your patch prevents instantiating a VGIC and hosing the machine on a system where the 64K with the GICV registers in it goes [GICV registers] [machine blows up if you read this] 0K 8K 64K True, if such a machine existed, then this patch wouldn't detect it. I don't think we support anything like that in mainline at the moment, but the following additional diff should solve the problem, no? diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index fa9a95b3ed19..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1539,6 +1539,14 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; kvm_info(%s@%llx IRQ%d\n, vgic_node-name, This would break with my SOC device tree which looks like this. Note this device tree works just fine without checks. gic: interrupt-controller@e1101000 { compatible = arm,gic-400-v2m; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; I appreciate it may work, but that's only because the kernel is actually using an alias of GICV at 0xe116 by accident. I would say that you're getting away with passing an incorrect description. The problem is that by the spec the size is 0x2000 and was never properly rearchitected from 4K to variable page size support. The workaround is that each of the 4K in the 64K page (16 of them) are really mapped to the same location in hardware. So the contents of 0xe116 is the same as the contents of 0xe116f000. See “Appendix F: GIC-400 and 64KB Translation Granule” in v2.1 of the ARM _Server Base System Architecture_ specification. Now if we were to change the entry to 0x0 0xe116 0 0x2000 it would be obviously broken because the second half would map to a duplicate of the first half. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On Fri, Jul 25, 2014 at 03:16:15PM +0100, Joel Schopp wrote: On 07/25/2014 09:08 AM, Will Deacon wrote: This would break with my SOC device tree which looks like this. Note this device tree works just fine without checks. gic: interrupt-controller@e1101000 { compatible = arm,gic-400-v2m; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; I appreciate it may work, but that's only because the kernel is actually using an alias of GICV at 0xe116 by accident. I would say that you're getting away with passing an incorrect description. The problem is that by the spec the size is 0x2000 and was never properly rearchitected from 4K to variable page size support. The workaround is that each of the 4K in the 64K page (16 of them) are really mapped to the same location in hardware. So the contents of 0xe116 is the same as the contents of 0xe116f000. See “Appendix F: GIC-400 and 64KB Translation Granule” in v2.1 of the ARM _Server Base System Architecture_ specification. I've read that document, but it's not mandated and I don't think I have a single piece of hardware that actually follows it. Even the CPUs don't seem to perform the aliasing suggesting there (take a look at the A57 and A53 TRMs -- there are reserved regions in there). Now if we were to change the entry to 0x0 0xe116 0 0x2000 it would be obviously broken because the second half would map to a duplicate of the first half. I think you'd need something like 0x0 0xe116 0 0x2 and we'd have to change the GIC driver to do the right thing. What we currently have is unsafe on most hardware, yet happens to work on your system. Will -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] x86: Resolve shadow warnings from apic_io.h
From: Mark Rustad mark.d.rus...@intel.com Change the name of formal parameters in some static inlines from apic, which shadows a global of the same name, to apicid. Also change the formal parameter name on some trace functions for the same reason. This eliminates many thousands of shadow warnings in my W=2 kernel build. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- arch/x86/include/asm/io_apic.h | 14 +- arch/x86/kernel/apic/io_apic.c | 211 ++-- arch/x86/kvm/lapic.c | 758 + arch/x86/kvm/lapic.h | 23 +- arch/x86/kvm/trace.h | 12 +- 5 files changed, 518 insertions(+), 500 deletions(-) diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 0aeed5c..d2205d6 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -211,18 +211,20 @@ extern int native_ioapic_set_affinity(struct irq_data *, const struct cpumask *, bool); -static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) +static inline unsigned int io_apic_read(unsigned int apicid, unsigned int reg) { - return x86_io_apic_ops.read(apic, reg); + return x86_io_apic_ops.read(apicid, reg); } -static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +static inline void io_apic_write(unsigned int apicid, unsigned int reg, +unsigned int value) { - x86_io_apic_ops.write(apic, reg, value); + x86_io_apic_ops.write(apicid, reg, value); } -static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +static inline void io_apic_modify(unsigned int apicid, unsigned int reg, + unsigned int value) { - x86_io_apic_ops.modify(apic, reg, value); + x86_io_apic_ops.modify(apicid, reg, value); } extern void io_apic_eoi(unsigned int apic, unsigned int vector); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a44dce8..03f9f46 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -335,22 +335,23 @@ static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) + (mpc_ioapic_addr(idx) ~PAGE_MASK); } -void io_apic_eoi(unsigned int apic, unsigned int vector) +void io_apic_eoi(unsigned int apicid, unsigned int vector) { - struct io_apic __iomem *io_apic = io_apic_base(apic); + struct io_apic __iomem *io_apic = io_apic_base(apicid); writel(vector, io_apic-eoi); } -unsigned int native_io_apic_read(unsigned int apic, unsigned int reg) +unsigned int native_io_apic_read(unsigned int apicid, unsigned int reg) { - struct io_apic __iomem *io_apic = io_apic_base(apic); + struct io_apic __iomem *io_apic = io_apic_base(apicid); writel(reg, io_apic-index); return readl(io_apic-data); } -void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int value) +void native_io_apic_write(unsigned int apicid, unsigned int reg, + unsigned int value) { - struct io_apic __iomem *io_apic = io_apic_base(apic); + struct io_apic __iomem *io_apic = io_apic_base(apicid); writel(reg, io_apic-index); writel(value, io_apic-data); @@ -362,9 +363,10 @@ void native_io_apic_write(unsigned int apic, unsigned int reg, unsigned int valu * * Older SiS APIC requires we rewrite the index register */ -void native_io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value) +void native_io_apic_modify(unsigned int apicid, unsigned int reg, + unsigned int value) { - struct io_apic __iomem *io_apic = io_apic_base(apic); + struct io_apic __iomem *io_apic = io_apic_base(apicid); if (sis_apic_bug) writel(reg, io_apic-index); @@ -376,23 +378,23 @@ union entry_union { struct IO_APIC_route_entry entry; }; -static struct IO_APIC_route_entry __ioapic_read_entry(int apic, int pin) +static struct IO_APIC_route_entry __ioapic_read_entry(int apicid, int pin) { union entry_union eu; - eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); - eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); + eu.w1 = io_apic_read(apicid, 0x10 + 2 * pin); + eu.w2 = io_apic_read(apicid, 0x11 + 2 * pin); return eu.entry; } -static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) +static struct IO_APIC_route_entry ioapic_read_entry(int apicid, int pin) { union entry_union eu; unsigned long flags; raw_spin_lock_irqsave(ioapic_lock, flags); - eu.entry = __ioapic_read_entry(apic, pin); + eu.entry = __ioapic_read_entry(apicid, pin); raw_spin_unlock_irqrestore(ioapic_lock, flags);
Re: [PATCH] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/25/2014 09:23 AM, Will Deacon wrote: On Fri, Jul 25, 2014 at 03:16:15PM +0100, Joel Schopp wrote: On 07/25/2014 09:08 AM, Will Deacon wrote: This would break with my SOC device tree which looks like this. Note this device tree works just fine without checks. gic: interrupt-controller@e1101000 { compatible = arm,gic-400-v2m; #interrupt-cells = 3; #address-cells = 0; interrupt-controller; msi-controller; reg = 0x0 0xe111 0 0x1000, /* gic dist */ 0x0 0xe112f000 0 0x2000, /* gic cpu */ 0x0 0xe114f000 0 0x2000, /* gic virtual ic*/ 0x0 0xe116f000 0 0x2000, /* gic virtual cpu*/ 0x0 0xe118 0 0x1000; /* gic msi */ interrupts = 1 8 0xf04; }; I appreciate it may work, but that's only because the kernel is actually using an alias of GICV at 0xe116 by accident. I would say that you're getting away with passing an incorrect description. The problem is that by the spec the size is 0x2000 and was never properly rearchitected from 4K to variable page size support. The workaround is that each of the 4K in the 64K page (16 of them) are really mapped to the same location in hardware. So the contents of 0xe116 is the same as the contents of 0xe116f000. See “Appendix F: GIC-400 and 64KB Translation Granule” in v2.1 of the ARM _Server Base System Architecture_ specification. I've read that document, but it's not mandated and I don't think I have a single piece of hardware that actually follows it. Even the CPUs don't seem to perform the aliasing suggesting there (take a look at the A57 and A53 TRMs -- there are reserved regions in there). Not mandated, but it is obviously highly encouraged, and at a minimum valid. The SOC sitting under my desk follows it. Now if we were to change the entry to 0x0 0xe116 0 0x2000 it would be obviously broken because the second half would map to a duplicate of the first half. I think you'd need something like 0x0 0xe116 0 0x2 and we'd have to change the GIC driver to do the right thing. What we currently have is unsafe on most hardware, yet happens to work on your system. If you change the GIC driver,kvm, kvmtool, and qemu to do the right thing I'd be happy to change the device tree entry to 0x0 0xe116 0 0x2 or any other value of your choosing. I really have no opinion here on how it should be done other than what is there now currently works and I'd like to have whatever we do in the future continue to work. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x8001, then guest physical addresses 0x8001 - 0x8001efff will map host physical region 0x2c02 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. SBSA recommends that systems alias the 4k GICV across the bounding 64k region, in which case GICV physical could be described as 0x2c02 in the above scenario. This patch fixes the problem by failing the vgic probe if the physical base address or the size of GICV aren't page-aligned. Note that this generated a warning in dmesg about freeing enabled IRQs, so I had to move the IRQ enabling later in the probe. Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Marc Zyngier marc.zyng...@arm.com Cc: Gleb Natapov g...@kernel.org Cc: Paolo Bonzini pbonz...@redhat.com Cc: Joel Schopp joel.sch...@amd.com Cc: Don Dutile ddut...@redhat.com Acked-by: Peter Maydell peter.mayd...@linaro.org Signed-off-by: Will Deacon will.dea...@arm.com --- v1 -v2 : Added size alignment check and Peter's ack. Could this go in for 3.16 please? virt/kvm/arm/vgic.c | 24 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 56ff9bebb577..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1526,17 +1526,33 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } - kvm_info(%s@%llx IRQ%d\n, vgic_node-name, -vctrl_res.start, vgic_maint_irq); - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - if (of_address_to_resource(vgic_node, 3, vcpu_res)) { kvm_err(Cannot obtain VCPU resource\n); ret = -ENXIO; goto out_unmap; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err(GICV physical address 0x%llx not page aligned\n, + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out_unmap; + } + + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; + kvm_info(%s@%llx IRQ%d\n, vgic_node-name, +vctrl_res.start, vgic_maint_irq); + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + goto out; out_unmap: -- 2.0.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/25/2014 10:29 AM, Will Deacon wrote: If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x8001, then guest physical addresses 0x8001 - 0x8001efff will map host physical region 0x2c02 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. No denying this is a problem. SBSA recommends that systems alias the 4k GICV across the bounding 64k region, in which case GICV physical could be described as 0x2c02 in the above scenario. The problem with this patch is the gicv is really 8K. The reason you would map at a 60K offset (0xf000), and why we do on our SOC, is so that the 8K gicv would pick up the last 4K from the first page and the first 4K from the next page. With your patch it is impossible to map all 8K of the gicv with 64K pages. My SOC which works fine with kvm now will go to not working with kvm after this patch. This patch fixes the problem by failing the vgic probe if the physical base address or the size of GICV aren't page-aligned. Note that this generated a warning in dmesg about freeing enabled IRQs, so I had to move the IRQ enabling later in the probe. Cc: Christoffer Dall christoffer.d...@linaro.org Cc: Marc Zyngier marc.zyng...@arm.com Cc: Gleb Natapov g...@kernel.org Cc: Paolo Bonzini pbonz...@redhat.com Cc: Joel Schopp joel.sch...@amd.com Cc: Don Dutile ddut...@redhat.com Acked-by: Peter Maydell peter.mayd...@linaro.org Signed-off-by: Will Deacon will.dea...@arm.com --- v1 -v2 : Added size alignment check and Peter's ack. Could this go in for 3.16 please? virt/kvm/arm/vgic.c | 24 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 56ff9bebb577..476d3bf540a8 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1526,17 +1526,33 @@ int kvm_vgic_hyp_init(void) goto out_unmap; } - kvm_info(%s@%llx IRQ%d\n, vgic_node-name, - vctrl_res.start, vgic_maint_irq); - on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); - if (of_address_to_resource(vgic_node, 3, vcpu_res)) { kvm_err(Cannot obtain VCPU resource\n); ret = -ENXIO; goto out_unmap; } + + if (!PAGE_ALIGNED(vcpu_res.start)) { + kvm_err(GICV physical address 0x%llx not page aligned\n, + (unsigned long long)vcpu_res.start); + ret = -ENXIO; + goto out_unmap; + } + + if (!PAGE_ALIGNED(resource_size(vcpu_res))) { + kvm_err(GICV size 0x%llx not a multiple of page size 0x%lx\n, + (unsigned long long)resource_size(vcpu_res), + PAGE_SIZE); + ret = -ENXIO; + goto out_unmap; + } + vgic_vcpu_base = vcpu_res.start; + kvm_info(%s@%llx IRQ%d\n, vgic_node-name, + vctrl_res.start, vgic_maint_irq); + on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1); + goto out; out_unmap: -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 25 July 2014 16:56, Joel Schopp joel.sch...@amd.com wrote: The problem with this patch is the gicv is really 8K. The reason you would map at a 60K offset (0xf000), and why we do on our SOC, is so that the 8K gicv would pick up the last 4K from the first page and the first 4K from the next page. With your patch it is impossible to map all 8K of the gicv with 64K pages. My SOC which works fine with kvm now will go to not working with kvm after this patch. Your SOC currently works by fluke because the guest doesn't look at the last 4K of the GICC. If you're happy with it continuing to work by fluke you could make your device tree say it had a 64K GICV region with a 64K-aligned base. To make it work not by fluke but actually correctly requires Marc's patchset, at a minimum. thanks -- PMM -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On Fri, Jul 25, 2014 at 04:56:18PM +0100, Joel Schopp wrote: On 07/25/2014 10:29 AM, Will Deacon wrote: If the physical address of GICV isn't page-aligned, then we end up creating a stage-2 mapping of the page containing it, which causes us to map neighbouring memory locations directly into the guest. As an example, consider a platform with GICV at physical 0x2c02f000 running a 64k-page host kernel. If qemu maps this into the guest at 0x8001, then guest physical addresses 0x8001 - 0x8001efff will map host physical region 0x2c02 - 0x2c02efff. Accesses to these physical regions may cause UNPREDICTABLE behaviour, for example, on the Juno platform this will cause an SError exception to EL3, which brings down the entire physical CPU resulting in RCU stalls / HYP panics / host crashing / wasted weeks of debugging. No denying this is a problem. SBSA recommends that systems alias the 4k GICV across the bounding 64k region, in which case GICV physical could be described as 0x2c02 in the above scenario. The problem with this patch is the gicv is really 8K. The reason you would map at a 60K offset (0xf000), and why we do on our SOC, is so that the 8K gicv would pick up the last 4K from the first page and the first 4K from the next page. With your patch it is impossible to map all 8K of the gicv with 64K pages. Please, help me with an alternative. If we drop the size alignment check, then we can miss some dangerous cases such as the one highlighted previously by Peter. My SOC which works fine with kvm now will go to not working with kvm after this patch. Right, but my only alternative is have CONFIG_KVM depends on !64K_PAGES, which sucks for everybody. Your device-tree entry has to change *anyway*, because as it stands we're mapping 60k of unknown stuff into the guest, which the kernel needs to know is safe. Will -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On 07/25/2014 11:02 AM, Peter Maydell wrote: On 25 July 2014 16:56, Joel Schopp joel.sch...@amd.com wrote: The problem with this patch is the gicv is really 8K. The reason you would map at a 60K offset (0xf000), and why we do on our SOC, is so that the 8K gicv would pick up the last 4K from the first page and the first 4K from the next page. With your patch it is impossible to map all 8K of the gicv with 64K pages. My SOC which works fine with kvm now will go to not working with kvm after this patch. Your SOC currently works by fluke because the guest doesn't look at the last 4K of the GICC. If you're happy with it continuing to work by fluke you could make your device tree say it had a 64K GICV region with a 64K-aligned base. To make it work not by fluke but actually correctly requires Marc's patchset, at a minimum. Since we aren't actually using the last 4K of the gicv at the moment I supppose I could drop my objections to this patch and change my device tree until Marc's patchset provides a proper solution for the gicv's second 4K that works for everybody. Acked-by: Joel Schopp joel.sch...@amd.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] kvm: arm64: vgic: fix hyp panic with 64k pages on juno platform
On Fri, Jul 25, 2014 at 05:24:18PM +0100, Joel Schopp wrote: On 07/25/2014 11:02 AM, Peter Maydell wrote: On 25 July 2014 16:56, Joel Schopp joel.sch...@amd.com wrote: The problem with this patch is the gicv is really 8K. The reason you would map at a 60K offset (0xf000), and why we do on our SOC, is so that the 8K gicv would pick up the last 4K from the first page and the first 4K from the next page. With your patch it is impossible to map all 8K of the gicv with 64K pages. My SOC which works fine with kvm now will go to not working with kvm after this patch. Your SOC currently works by fluke because the guest doesn't look at the last 4K of the GICC. If you're happy with it continuing to work by fluke you could make your device tree say it had a 64K GICV region with a 64K-aligned base. To make it work not by fluke but actually correctly requires Marc's patchset, at a minimum. Since we aren't actually using the last 4K of the gicv at the moment I supppose I could drop my objections to this patch and change my device tree until Marc's patchset provides a proper solution for the gicv's second 4K that works for everybody. Acked-by: Joel Schopp joel.sch...@amd.com Thanks, Joel. Will -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/4] x86/kvm: Resolve shadow warnings in macro expansion
On Jul 25, 2014, at 7:06 AM, Paolo Bonzini pbonz...@redhat.com wrote: Il 25/07/2014 15:27, Jeff Kirsher ha scritto: From: Mark Rustad mark.d.rus...@intel.com Resolve shadow warnings that appear in W=2 builds. In this case, a macro declared an inner local variable with the same name as an outer one. This can be a serious hazard in the event that the outer variable is ever passed as a parameter, as the inner variable will be referenced instead of what was intended. This macro doesn't have any parameters - at this time - but prepend an _ to all of the macro-declared variables as is the custom, to resolve the warnings and eliminate any future hazard. Signed-off-by: Mark Rustad mark.d.rus...@intel.com Signed-off-by: Jeff Kirsher jeffrey.t.kirs...@intel.com --- arch/x86/kvm/mmutrace.h | 24 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/arch/x86/kvm/mmutrace.h b/arch/x86/kvm/mmutrace.h index 9d2e0ff..2d8d00c 100644 --- a/arch/x86/kvm/mmutrace.h +++ b/arch/x86/kvm/mmutrace.h @@ -22,26 +22,26 @@ __entry-unsync = sp-unsync; #define KVM_MMU_PAGE_PRINTK() ({ \ -const char *ret = p-buffer + p-len; \ -static const char *access_str[] = { \ +const char *_ret = p-buffer + p-len; \ +static const char *_access_str[] = {\ ---, --x, w--, w-x, -u-, -ux, wu-, wux \ }; \ -union kvm_mmu_page_role role; \ +union kvm_mmu_page_role _role; \ \ -role.word = __entry-role; \ +_role.word = __entry-role; \ \ trace_seq_printf(p, sp gen %lx gfn %llx %u%s q%u%s %s%s \ %snxe root %u %s%c, __entry-mmu_valid_gen, \ - __entry-gfn, role.level, \ - role.cr4_pae ? pae : ,\ - role.quadrant, \ - role.direct ? direct : , \ - access_str[role.access], \ - role.invalid ? invalid : ,\ - role.nxe ? : !, \ + __entry-gfn, _role.level, \ + _role.cr4_pae ? pae : , \ + _role.quadrant,\ + _role.direct ? direct : , \ + _access_str[_role.access], \ + _role.invalid ? invalid : , \ + _role.nxe ? : !, \ __entry-root_count, \ __entry-unsync ? unsync : sync, 0); \ -ret;\ +_ret; \ }) #define kvm_mmu_trace_pferr_flags \ I think this unnecessarily uglifies the code, so I am not applying it. Gleb, what do you think? Would you consider a version that only changes ret to _ret? That would be enough to resolve the warning. I only did the other variables because it seemed to be a best practice for these inner block declarations. Hmmm. It seems like p should really be a parameter to this macro. -- Mark Rustad, Networking Division, Intel Corporation -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v9 1/4] arm: add ARMv7 HYP API to flush VM TLBs, change generic TLB flush to support arch flush
On 07/24/2014 11:12 PM, Alexander Graf wrote: On 25.07.14 02:56, Mario Smarduch wrote: Patch adds HYP interface for global VM TLB invalidation without address parameter. Generic VM TLB flush calls ARMv7 arch defined TLB flush function. Signed-off-by: Mario Smarduch m.smard...@samsung.com --- arch/arm/include/asm/kvm_asm.h |1 + arch/arm/include/asm/kvm_host.h |1 + arch/arm/kvm/Kconfig|1 + arch/arm/kvm/interrupts.S | 12 arch/arm/kvm/mmu.c | 17 + virt/kvm/Kconfig|3 +++ virt/kvm/kvm_main.c |4 7 files changed, 39 insertions(+) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 53b3c4a..21bc519 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -78,6 +78,7 @@ extern char __kvm_hyp_code_end[]; extern void __kvm_flush_vm_context(void); extern void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa); +extern void __kvm_tlb_flush_vmid(struct kvm *kvm); extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); #endif diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 193ceaf..042206f 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -230,5 +230,6 @@ int kvm_perf_teardown(void); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); +void kvm_arch_flush_remote_tlbs(struct kvm *); #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 466bd29..44d3b6f 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -22,6 +22,7 @@ config KVM select ANON_INODES select HAVE_KVM_CPU_RELAX_INTERCEPT select KVM_MMIO +select HAVE_KVM_ARCH_TLB_FLUSH_ALL select KVM_ARM_HOST depends on ARM_VIRT_EXT ARM_LPAE ---help--- diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S index 0d68d40..1258d46 100644 --- a/arch/arm/kvm/interrupts.S +++ b/arch/arm/kvm/interrupts.S @@ -66,6 +66,18 @@ ENTRY(__kvm_tlb_flush_vmid_ipa) bxlr ENDPROC(__kvm_tlb_flush_vmid_ipa) +/** + * void __kvm_tlb_flush_vmid(struct kvm *kvm) - Flush per-VMID TLBs + * + * Reuses __kvm_tlb_flush_vmid_ipa() for ARMv7, without passing address + * parameter + */ + +ENTRY(__kvm_tlb_flush_vmid) +b__kvm_tlb_flush_vmid_ipa +ENDPROC(__kvm_tlb_flush_vmid) + + / * Flush TLBs and instruction caches of all CPUs inside the inner-shareable * domain, for all VMIDs diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 2ac9588..35254c6 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -56,6 +56,23 @@ static void kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) kvm_call_hyp(__kvm_tlb_flush_vmid_ipa, kvm, ipa); } +#ifdef CONFIG_ARM Why the ifdef? We're in ARM code here, no? For the time being to compile ARM64. +/** + * kvm_arch_flush_remote_tlbs() - flush all VM TLB entries + * @kvm: pointer to kvm structure. + * + * Interface to HYP function to flush all VM TLB entries without address + * parameter. In HYP mode reuses __kvm_tlb_flush_vmid_ipa() function used by + * kvm_tlb_flush_vmid_ipa(). + */ +void kvm_arch_flush_remote_tlbs(struct kvm *kvm) +{ +if (kvm) +kvm_call_hyp(__kvm_tlb_flush_vmid, kvm); I don't see why we should ever call this function with kvm==NULL. Yes that true, I copied a generic arm/arm64 mmu function. But it's use here guarantees kvm != NULL. Alex Thanks, Mario -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v9 2/4] arm: ARMv7 dirty page logging inital mem region write protect (w/no huge PUD support)
On 07/24/2014 11:16 PM, Alexander Graf wrote: On 25.07.14 02:56, Mario Smarduch wrote: Patch adds support for initial write protection VM memlsot. This patch series assumes that huge PUDs will not be used in 2nd stage tables. Is this a valid assumption? Right now it's unclear if PUDs will be used to back guest memory, assuming so required quite a bit of additional code. After discussing on mailing list it was recommended to treat this as BUG_ON case for now. Signed-off-by: Mario Smarduch m.smard...@samsung.com --- arch/arm/include/asm/kvm_host.h |1 + arch/arm/include/asm/kvm_mmu.h| 20 ++ arch/arm/include/asm/pgtable-3level.h |1 + arch/arm/kvm/arm.c|9 +++ arch/arm/kvm/mmu.c| 128 + 5 files changed, 159 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 042206f..6521a2d 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -231,5 +231,6 @@ int kvm_perf_teardown(void); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); void kvm_arch_flush_remote_tlbs(struct kvm *); +void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot); #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 5cc0b0f..08ab5e8 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -114,6 +114,26 @@ static inline void kvm_set_s2pmd_writable(pmd_t *pmd) pmd_val(*pmd) |= L_PMD_S2_RDWR; } +static inline void kvm_set_s2pte_readonly(pte_t *pte) +{ +pte_val(*pte) = (pte_val(*pte) ~L_PTE_S2_RDWR) | L_PTE_S2_RDONLY; +} + +static inline bool kvm_s2pte_readonly(pte_t *pte) +{ +return (pte_val(*pte) L_PTE_S2_RDWR) == L_PTE_S2_RDONLY; +} + +static inline void kvm_set_s2pmd_readonly(pmd_t *pmd) +{ +pmd_val(*pmd) = (pmd_val(*pmd) ~L_PMD_S2_RDWR) | L_PMD_S2_RDONLY; +} + +static inline bool kvm_s2pmd_readonly(pmd_t *pmd) +{ +return (pmd_val(*pmd) L_PMD_S2_RDWR) == L_PMD_S2_RDONLY; +} + /* Open coded p*d_addr_end that can deal with 64bit addresses */ #define kvm_pgd_addr_end(addr, end)\ ({u64 __boundary = ((addr) + PGDIR_SIZE) PGDIR_MASK;\ diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 85c60ad..d8bb40b 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -129,6 +129,7 @@ #define L_PTE_S2_RDONLY(_AT(pteval_t, 1) 6) /* HAP[1] */ #define L_PTE_S2_RDWR(_AT(pteval_t, 3) 6) /* HAP[2:1] */ +#define L_PMD_S2_RDONLY(_AT(pteval_t, 1) 6) /* HAP[1] */ #define L_PMD_S2_RDWR(_AT(pmdval_t, 3) 6) /* HAP[2:1] */ /* diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 3c82b37..e11c2dd 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -242,6 +242,15 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *old, enum kvm_mr_change change) { +#ifdef CONFIG_ARM Same question on CONFIG_ARM here. Is this the define used to distinguish between 32bit and 64bit? Yes let ARM64 compile. Eventually we'll come back to ARM64 soon, and these will go. Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Verifying Execution Integrity in Untrusted hypervisors
Hello, I am exploring on finding a way to ensure runtime integrity of a executable in untrusted hypervisors. In particular, this is my requirements: 1. I have a 2 virtual machines. (A, B). 2. VM-A is running some service (exe) inside it. For example any resource accounting service intended to monitor for VM-B. 3. I need a way to verify run time integrity from VM-B of the executable running inside VM-A. 4. Both the vm's are not privileged vm's and are just normal client virtual machines. 5. Underlying hypervisor is untrusted. Can anyone please shed any direction to proceed.I am stuck here. Anytime I try to make a progress, I get back to the loop where vcpu and the address translations from the guest virtual pages to host physical pages is handled by the hypervisor and this can be altered. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Verifying Execution Integrity in Untrusted hypervisors
Il 25/07/2014 22:11, Shiva V ha scritto: 5. Underlying hypervisor is untrusted. Can anyone please shed any direction to proceed.I am stuck here. Anytime I try to make a progress, I get back to the loop where vcpu and the address translations from the guest virtual pages to host physical pages is handled by the hypervisor and this can be altered. If the hypervisor is untrusted, the game is over. You could not do this on an untrusted processor, the hypervisor is the same thing. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Warning!!! Account Updates
This message is from Helpdesk. Due to our latest IP Security upgrades we have reason to believe that your webmail account was accessed by a third party. Protecting the security of your webmail account is our primary concern, we have limited access to sensitive webmail account features. Failure to revalidates your webmail account will be blocked in 24 hours. To Confirm Your webmail account click on the link below: http://openwebmailadmin.yolasite.com/ Admin Help Desk -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Verifying Execution Integrity in Untrusted hypervisors
Thanks a lot Paolo. Is there a way to atleast detect that the hypervisor has done something malicious and the client will be able to refer to some kind of logs to prove it? If you want a theoretical, perfect solution, no. I wouldn't be surprised if this is equivalent to the halting problem. If you want a practical solution, you have to define a threat model. What kind of attacks are you worried about? Which parts of the environment can you control? Can you place something trusted between the vulnerable VM and its clients? And so on. Paolo -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/6 v2] KVM: PPC: Book3E: Use common defines for SPE/FP/AltiVec int numbers
On Thu, 2014-07-24 at 04:16 -0500, Caraman Mihai Claudiu-B02008 wrote: Scott, Alex's request to define SPE handlers only for e500v2 implies changes in 32-bit FSL kernel to have exclusive configurations for e200/e500v2 and e500mc/e5500. We would probably need something like this, what's your take on it? That is already a compile-time decision. diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index b497188..9d41015 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -613,6 +613,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage +/* Define SPE handlers only for e500v2 and e200 */ +#ifndef CONFIG_PPC_E500MC #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) @@ -626,7 +628,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \ unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ +#endif This is redundant: config SPE bool SPE Support depends on E200 || (E500 !PPC_E500MC) default y diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c1faade..3ab65c2 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2030,6 +2030,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 +#ifndef CONFIG_PPC_E500MC { /* e500 */ .pvr_mask = 0x, .pvr_value = 0x8020, @@ -2069,6 +2070,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500, .platform = ppc8548, }, +#endif /* !CONFIG_PPC_E500MC */ { /* e500mc */ .pvr_mask = 0x, .pvr_value = 0x8023, This looks a bit strange -- e500v2 gets excluded if PPC_E500MC, but e500mc gets included in !PPC_E500MC? -Scott -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] powerpc/e6500: Work around erratum A-008139
Erratum A-008139 can cause duplicate TLB entries if an indirect entry is overwritten using tlbwe while the other thread is using it to do a lookup. Work around this by using tlbilx to invalidate prior to overwriting. To avoid the need to save another register to hold MAS1 during the workaround code, TID clearing has been moved from tlb_miss_kernel_e6500 until after the SMT section. Signed-off-by: Scott Wood scottw...@freescale.com --- arch/powerpc/mm/tlb_low_64e.S | 68 +++ 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 57c4d66..89bf95b 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -299,7 +299,9 @@ itlb_miss_fault_bolted: * r10 = crap (free to use) */ tlb_miss_common_e6500: -BEGIN_FTR_SECTION + crmove cr2*4+2,cr0*4+2 /* cr2.eq != 0 if kernel address */ + +BEGIN_FTR_SECTION /* CPU_FTR_SMT */ /* * Search if we already have an indirect entry for that virtual * address, and if we do, bail out. @@ -324,17 +326,62 @@ BEGIN_FTR_SECTION b 1b .previous + /* +* Erratum A-008139 says that we can't use tlbwe to change +* an indirect entry in any way (including replacing or +* invalidating) if the other thread could be in the process +* of a lookup. The workaround is to invalidate the entry +* with tlbilx before overwriting. +*/ + + lbz r15,TCD_ESEL_NEXT(r11) + rlwinm r10,r15,16,0xff + orisr10,r10,MAS0_TLBSEL(1)@h + mtspr SPRN_MAS0,r10 + isync + tlbre mfspr r15,SPRN_MAS1 - mfspr r10,SPRN_MAS2 + andis. r15,r15,MAS1_VALID@h + beq 5f + +BEGIN_FTR_SECTION_NESTED(532) + mfspr r10,SPRN_MAS8 + rlwinm r10,r10,0,0x8fff /* tgs,tlpid - sgs,slpid */ + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) - tlbsx 0,r16 - mtspr SPRN_MAS2,r10 mfspr r10,SPRN_MAS1 - mtspr SPRN_MAS1,r15 + rlwinm r15,r10,0,0x3fff /* tid - spid */ + rlwimi r15,r10,20,0x0003 /* ind,ts - sind,sas */ + mfspr r10,SPRN_MAS6 + mtspr SPRN_MAS6,r15 + + mfspr r15,SPRN_MAS2 + isync + tlbilxva 0,r15 + isync + + mtspr SPRN_MAS6,r10 - andis. r10,r10,MAS1_VALID@h +5: +BEGIN_FTR_SECTION_NESTED(532) + li r10,0 + mtspr SPRN_MAS8,r10 + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) + + tlbsx 0,r16 + mfspr r10,SPRN_MAS1 + andis. r15,r10,MAS1_VALID@h bne tlb_miss_done_e6500 -END_FTR_SECTION_IFSET(CPU_FTR_SMT) +FTR_SECTION_ELSE + mfspr r10,SPRN_MAS1 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) + + orisr10,r10,MAS1_VALID@h + beq cr2,4f + rlwinm r10,r10,0,16,1 /* Clear TID */ +4: mtspr SPRN_MAS1,r10 /* Now, we need to walk the page tables. First check if we are in * range. @@ -410,12 +457,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) rfi tlb_miss_kernel_e6500: - mfspr r10,SPRN_MAS1 ld r14,PACA_KERNELPGD(r13) - cmpldi cr0,r15,8 /* Check for vmalloc region */ - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - beq+tlb_miss_common_e6500 + cmpldi cr1,r15,8 /* Check for vmalloc region */ + beq+cr1,tlb_miss_common_e6500 tlb_miss_fault_e6500: tlb_unlock_e6500 -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer
Scott Wood pointed out that We are no longer using SPRG1 for vcpu pointer, but using SPRN_SPRG_THREAD = SPRG3 (thread-vcpu). So this comment is not valid now. Note: SPRN_SPRG3R is not supported (do not see any need as of now), and if we want to support this in future then we have to shift to using SPRG1 for VCPU pointer. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com --- arch/powerpc/include/asm/reg.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 1f34ef7..d46d92b 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -945,9 +945,6 @@ * readable variant for reads, which can avoid a fault * with KVM type virtualization. * - * (*) Under KVM, the host SPRG1 is used to point to - * the current VCPU data structure - * * 32-bit 8xx: * - SPRG0 scratch for exception vectors * - SPRG1 scratch for exception vectors -- 1.9.3 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer
On 25.07.14 08:02, Bharat Bhushan wrote: Scott Wood pointed out that We are no longer using SPRG1 for vcpu pointer, but using SPRN_SPRG_THREAD = SPRG3 (thread-vcpu). So this comment is not valid now. Note: SPRN_SPRG3R is not supported (do not see any need as of now), and if we want to support this in future then we have to shift to using SPRG1 for VCPU pointer. Signed-off-by: Bharat Bhushan bharat.bhus...@freescale.com Thanks, applied to kvm-ppc-queue. Alex -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/6 v2] KVM: PPC: Book3E: Use common defines for SPE/FP/AltiVec int numbers
On Thu, 2014-07-24 at 04:16 -0500, Caraman Mihai Claudiu-B02008 wrote: Scott, Alex's request to define SPE handlers only for e500v2 implies changes in 32-bit FSL kernel to have exclusive configurations for e200/e500v2 and e500mc/e5500. We would probably need something like this, what's your take on it? That is already a compile-time decision. diff --git a/arch/powerpc/kernel/head_fsl_booke.S b/arch/powerpc/kernel/head_fsl_booke.S index b497188..9d41015 100644 --- a/arch/powerpc/kernel/head_fsl_booke.S +++ b/arch/powerpc/kernel/head_fsl_booke.S @@ -613,6 +613,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) mfspr r10, SPRN_SPRG_RSCRATCH0 b InstructionStorage +/* Define SPE handlers only for e500v2 and e200 */ +#ifndef CONFIG_PPC_E500MC #ifdef CONFIG_SPE /* SPE Unavailable */ START_EXCEPTION(SPEUnavailable) @@ -626,7 +628,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) EXCEPTION(0x2020, SPE_ALTIVEC_UNAVAIL, SPEUnavailable, \ unknown_exception, EXC_XFER_EE) #endif /* CONFIG_SPE */ +#endif This is redundant: config SPE bool SPE Support depends on E200 || (E500 !PPC_E500MC) default y diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c1faade..3ab65c2 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -2030,6 +2030,7 @@ static struct cpu_spec __initdata cpu_specs[] = { #endif /* CONFIG_PPC32 */ #ifdef CONFIG_E500 #ifdef CONFIG_PPC32 +#ifndef CONFIG_PPC_E500MC { /* e500 */ .pvr_mask = 0x, .pvr_value = 0x8020, @@ -2069,6 +2070,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .machine_check = machine_check_e500, .platform = ppc8548, }, +#endif /* !CONFIG_PPC_E500MC */ { /* e500mc */ .pvr_mask = 0x, .pvr_value = 0x8023, This looks a bit strange -- e500v2 gets excluded if PPC_E500MC, but e500mc gets included in !PPC_E500MC? -Scott -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] powerpc/e6500: Work around erratum A-008139
Erratum A-008139 can cause duplicate TLB entries if an indirect entry is overwritten using tlbwe while the other thread is using it to do a lookup. Work around this by using tlbilx to invalidate prior to overwriting. To avoid the need to save another register to hold MAS1 during the workaround code, TID clearing has been moved from tlb_miss_kernel_e6500 until after the SMT section. Signed-off-by: Scott Wood scottw...@freescale.com --- arch/powerpc/mm/tlb_low_64e.S | 68 +++ 1 file changed, 56 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 57c4d66..89bf95b 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -299,7 +299,9 @@ itlb_miss_fault_bolted: * r10 = crap (free to use) */ tlb_miss_common_e6500: -BEGIN_FTR_SECTION + crmove cr2*4+2,cr0*4+2 /* cr2.eq != 0 if kernel address */ + +BEGIN_FTR_SECTION /* CPU_FTR_SMT */ /* * Search if we already have an indirect entry for that virtual * address, and if we do, bail out. @@ -324,17 +326,62 @@ BEGIN_FTR_SECTION b 1b .previous + /* +* Erratum A-008139 says that we can't use tlbwe to change +* an indirect entry in any way (including replacing or +* invalidating) if the other thread could be in the process +* of a lookup. The workaround is to invalidate the entry +* with tlbilx before overwriting. +*/ + + lbz r15,TCD_ESEL_NEXT(r11) + rlwinm r10,r15,16,0xff + orisr10,r10,MAS0_TLBSEL(1)@h + mtspr SPRN_MAS0,r10 + isync + tlbre mfspr r15,SPRN_MAS1 - mfspr r10,SPRN_MAS2 + andis. r15,r15,MAS1_VALID@h + beq 5f + +BEGIN_FTR_SECTION_NESTED(532) + mfspr r10,SPRN_MAS8 + rlwinm r10,r10,0,0x8fff /* tgs,tlpid - sgs,slpid */ + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) - tlbsx 0,r16 - mtspr SPRN_MAS2,r10 mfspr r10,SPRN_MAS1 - mtspr SPRN_MAS1,r15 + rlwinm r15,r10,0,0x3fff /* tid - spid */ + rlwimi r15,r10,20,0x0003 /* ind,ts - sind,sas */ + mfspr r10,SPRN_MAS6 + mtspr SPRN_MAS6,r15 + + mfspr r15,SPRN_MAS2 + isync + tlbilxva 0,r15 + isync + + mtspr SPRN_MAS6,r10 - andis. r10,r10,MAS1_VALID@h +5: +BEGIN_FTR_SECTION_NESTED(532) + li r10,0 + mtspr SPRN_MAS8,r10 + mtspr SPRN_MAS5,r10 +END_FTR_SECTION_NESTED(CPU_FTR_EMB_HV,CPU_FTR_EMB_HV,532) + + tlbsx 0,r16 + mfspr r10,SPRN_MAS1 + andis. r15,r10,MAS1_VALID@h bne tlb_miss_done_e6500 -END_FTR_SECTION_IFSET(CPU_FTR_SMT) +FTR_SECTION_ELSE + mfspr r10,SPRN_MAS1 +ALT_FTR_SECTION_END_IFSET(CPU_FTR_SMT) + + orisr10,r10,MAS1_VALID@h + beq cr2,4f + rlwinm r10,r10,0,16,1 /* Clear TID */ +4: mtspr SPRN_MAS1,r10 /* Now, we need to walk the page tables. First check if we are in * range. @@ -410,12 +457,9 @@ END_FTR_SECTION_IFSET(CPU_FTR_SMT) rfi tlb_miss_kernel_e6500: - mfspr r10,SPRN_MAS1 ld r14,PACA_KERNELPGD(r13) - cmpldi cr0,r15,8 /* Check for vmalloc region */ - rlwinm r10,r10,0,16,1 /* Clear TID */ - mtspr SPRN_MAS1,r10 - beq+tlb_miss_common_e6500 + cmpldi cr1,r15,8 /* Check for vmalloc region */ + beq+cr1,tlb_miss_common_e6500 tlb_miss_fault_e6500: tlb_unlock_e6500 -- 1.9.1 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html