[PATCH 2/2] kvm/x86: use __test_bit
Let compiler do slightly better optimizations using the non-volatile __test_bit in all cases where the values are set using the non-volatile __set_bit and __clear_bit. I left test_bit in place where the mask is set using the atomic set_bit/clear_bit, for symmetry. This shaves about 100 bytes off the kernel size: before: 134868 29978372 146237 23b3d arch/x86/kvm/kvm-intel.ko 34312947640 441 391210 5f82a arch/x86/kvm/kvm.ko after: 134836 29978372 146205 23b1d arch/x86/kvm/kvm-intel.ko 343017 47640 441 391098 5f7ba arch/x86/kvm/kvm.ko Signed-off-by: Michael S. Tsirkin m...@redhat.com --- arch/x86/kvm/ioapic.h | 2 +- arch/x86/kvm/kvm_cache_regs.h | 6 +++--- arch/x86/kvm/ioapic.c | 2 +- arch/x86/kvm/pmu_intel.c | 2 +- arch/x86/kvm/vmx.c| 18 +- arch/x86/kvm/x86.c| 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h index ca0b0b4..3b58d41 100644 --- a/arch/x86/kvm/ioapic.h +++ b/arch/x86/kvm/ioapic.h @@ -102,7 +102,7 @@ static inline bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector) { struct kvm_ioapic *ioapic = kvm-arch.vioapic; smp_rmb(); - return test_bit(vector, ioapic-handled_vectors); + return __test_bit(vector, ioapic-handled_vectors); } void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index e1e89ee..21ef6d6 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -9,7 +9,7 @@ static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, enum kvm_reg reg) { - if (!test_bit(reg, (unsigned long *)vcpu-arch.regs_avail)) + if (!__test_bit(reg, (unsigned long *)vcpu-arch.regs_avail)) kvm_x86_ops-cache_reg(vcpu, reg); return vcpu-arch.regs[reg]; @@ -38,7 +38,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) { might_sleep(); /* on svm */ - if (!test_bit(VCPU_EXREG_PDPTR, + if (!__test_bit(VCPU_EXREG_PDPTR, (unsigned long *)vcpu-arch.regs_avail)) kvm_x86_ops-cache_reg(vcpu, VCPU_EXREG_PDPTR); @@ -68,7 +68,7 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) { - if (!test_bit(VCPU_EXREG_CR3, (ulong *)vcpu-arch.regs_avail)) + if (!__test_bit(VCPU_EXREG_CR3, (ulong *)vcpu-arch.regs_avail)) kvm_x86_ops-decache_cr3(vcpu); return vcpu-arch.cr3; } diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 856f791..bf2afa5 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -117,7 +117,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu) return; new_val = kvm_apic_pending_eoi(vcpu, e-fields.vector); - old_val = test_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map); + old_val = __test_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map); if (new_val == old_val) return; diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c index ab38af4..fb20a0f 100644 --- a/arch/x86/kvm/pmu_intel.c +++ b/arch/x86/kvm/pmu_intel.c @@ -98,7 +98,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc) { struct kvm_pmu *pmu = pmc_to_pmu(pmc); - return test_bit(pmc-idx, (unsigned long *)pmu-global_ctrl); + return __test_bit(pmc-idx, (unsigned long *)pmu-global_ctrl); } static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c117703..ed44026 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2025,7 +2025,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { unsigned long rflags, save_rflags; - if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)vcpu-arch.regs_avail)) { + if (!__test_bit(VCPU_EXREG_RFLAGS, (ulong *)vcpu-arch.regs_avail)) { __set_bit(VCPU_EXREG_RFLAGS, (ulong *)vcpu-arch.regs_avail); rflags = vmcs_readl(GUEST_RFLAGS); if (to_vmx(vcpu)-rmode.vm86_active) { @@ -3478,7 +3478,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) { struct kvm_mmu *mmu = vcpu-arch.walk_mmu; - if (!test_bit(VCPU_EXREG_PDPTR, + if (!__test_bit(VCPU_EXREG_PDPTR, (unsigned long *)vcpu-arch.regs_dirty)) return; @@ -3513,7 +3513,7 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0, unsigned long cr0, struct kvm_vcpu *vcpu) { - if (!test_bit(VCPU_EXREG_CR3, (ulong *)vcpu-arch.regs_avail)) + if (!__test_bit(VCPU_EXREG_CR3, (ulong
[PATCH RFC 0/3] pci-testdev add support for kvm ioeventfd pf
This adds a test for triggering ioeventfd on pagefaults. This was used to verify that mmio ioeventfd on pagefault is as fast as portio. Michael S. Tsirkin (3): pci-testdev: separate page for each mmio test pci-testdev: add subregion pci-testdev: add RO pages for ioeventfd hw/misc/pci-testdev.c | 25 ++--- 1 file changed, 22 insertions(+), 3 deletions(-) -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC 3/3] pci-testdev: add RO pages for ioeventfd
This seems hackish - would it be better to create this region automatically within kvm? Suggestions are welcome. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/misc/pci-testdev.c | 13 + 1 file changed, 13 insertions(+) diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c index 94141a3..55efc32 100644 --- a/hw/misc/pci-testdev.c +++ b/hw/misc/pci-testdev.c @@ -21,6 +21,7 @@ #include hw/pci/pci.h #include qemu/event_notifier.h #include qemu/osdep.h +#include sys/mman.h typedef struct PCITestDevHdr { uint8_t test; @@ -82,11 +83,13 @@ typedef struct PCITestDevState { PCIDevice parent_obj; /* public */ +MemoryRegion zeromr; MemoryRegion mmio; MemoryRegion mbar; MemoryRegion portio; IOTest *tests; int current; +void *zero; } PCITestDevState; #define TYPE_PCI_TEST_DEV pci-testdev @@ -242,6 +245,11 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp) uint8_t *pci_conf; char *name; int r, i; +d-zero = mmap(NULL, IOTEST_MEMSIZE * 2, PROT_READ, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + +memory_region_init_ram_ptr(d-zeromr, OBJECT(d), pci-testdev-zero, 0x1000, d-zero); +memory_region_set_readonly(d-zeromr, true); pci_conf = pci_dev-config; @@ -286,6 +294,11 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp) test-hasnotifier = false; continue; } + +if (test-hasnotifier !test-size) { +memory_region_add_subregion_overlap(d-mbar, le32_to_cpu(test-hdr-offset), +d-zeromr, 2 /* prio */); +} r = event_notifier_init(test-notifier, 0); assert(r = 0); test-hasnotifier = true; -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC 2/3] pci-testdev: add subregion
Make mmio a subregion of the BAR. This will allow mapping rom within the same BAR down the road. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/misc/pci-testdev.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c index 6edc1cd..94141a3 100644 --- a/hw/misc/pci-testdev.c +++ b/hw/misc/pci-testdev.c @@ -83,6 +83,7 @@ typedef struct PCITestDevState { /* public */ MemoryRegion mmio; +MemoryRegion mbar; MemoryRegion portio; IOTest *tests; int current; @@ -248,9 +249,13 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp) memory_region_init_io(d-mmio, OBJECT(d), pci_testdev_mmio_ops, d, pci-testdev-mmio, IOTEST_MEMSIZE * 2); +memory_region_init(d-mbar, OBJECT(d), + pci-testdev-mmio, IOTEST_MEMSIZE * 2); memory_region_init_io(d-portio, OBJECT(d), pci_testdev_pio_ops, d, pci-testdev-portio, IOTEST_IOSIZE * 2); -pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, d-mmio); + +memory_region_add_subregion_overlap(d-mbar, 0, d-mmio, 1 /* prio */); +pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, d-mbar); pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, d-portio); d-current = -1; -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC 1/3] pci-testdev: separate page for each mmio test
note: this makes BAR 4K, which requires kvm unit test patch to support such BAR. Do we need to worry about old kvm unit test binaries? I'm guessing not ... Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/misc/pci-testdev.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c index b6e11d6..6edc1cd 100644 --- a/hw/misc/pci-testdev.c +++ b/hw/misc/pci-testdev.c @@ -48,7 +48,7 @@ typedef struct IOTest { #define IOTEST_NODATA 0xAB #define IOTEST_IOSIZE 128 -#define IOTEST_MEMSIZE 2048 +#define IOTEST_MEMSIZE 0x1 static const char *iotest_test[] = { no-eventfd, @@ -262,7 +262,7 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp) test-hdr = g_malloc0(test-bufsize); memcpy(test-hdr-name, name, strlen(name) + 1); g_free(name); -test-hdr-offset = cpu_to_le32(IOTEST_SIZE(i) + i * IOTEST_ACCESS_WIDTH); +test-hdr-offset = cpu_to_le32(IOTEST_SIZE(i) + i * (IOTEST_IS_MEM(i) ? 0x1000 : IOTEST_ACCESS_WIDTH)); test-size = strcmp(IOTEST_TEST(i), nodata-eventfd) ? IOTEST_ACCESS_WIDTH : 0; test-match_data = strcmp(IOTEST_TEST(i), wildcard-eventfd) @@ -273,6 +273,7 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp) test-mr = IOTEST_REGION(d, i); if (!test-size !IOTEST_IS_MEM(i)) { +test-hdr-width = 0; test-hasnotifier = false; continue; } -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC 2/3] svm: allow ioeventfd for NPT page faults
MMIO is slightly slower than port IO because it uses the page-tables, so the CPU must do a pagewalk on each access. This overhead is normally masked by using the TLB cache: but not so for KVM MMIO, where PTEs are marked as reserved and so are never cached. As ioeventfd memory is never read, make it possible to use RO pages on the host for ioeventfds, instead. The result is that TLBs are cached, which finally makes MMIO as fast as port IO. Warning: untested. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- arch/x86/kvm/svm.c | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 8e0c084..6422fac 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1812,6 +1812,11 @@ static int pf_interception(struct vcpu_svm *svm) switch (svm-apf_reason) { default: error_code = svm-vmcb-control.exit_info_1; + if (!kvm_io_bus_write(svm-vcpu, KVM_FAST_MMIO_BUS, + fault_address, 0, NULL)) { + skip_emulated_instruction(svm-vcpu); + return 1; + } trace_kvm_page_fault(fault_address, error_code); if (!npt_enabled kvm_event_needs_reinjection(svm-vcpu)) -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC 3/3] kvm: add KVM_CAP_IOEVENTFD_PF capability
Signed-off-by: Michael S. Tsirkin m...@redhat.com --- include/uapi/linux/kvm.h | 1 + arch/x86/kvm/x86.c| 1 + Documentation/virtual/kvm/api.txt | 7 +++ 3 files changed, 9 insertions(+) diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 716ad4a..4509aa3 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -817,6 +817,7 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_DISABLE_QUIRKS 116 #define KVM_CAP_X86_SMM 117 #define KVM_CAP_MULTI_ADDRESS_SPACE 118 +#define KVM_CAP_IOEVENTFD_PF 119 #ifdef KVM_CAP_IRQ_ROUTING diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c8015fa..f989453 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2629,6 +2629,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IRQ_INJECT_STATUS: case KVM_CAP_IOEVENTFD: case KVM_CAP_IOEVENTFD_NO_LENGTH: + case KVM_CAP_IOEVENTFD_PF: case KVM_CAP_PIT2: case KVM_CAP_PIT_STATE2: case KVM_CAP_SET_IDENTITY_MAP_ADDR: diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index a7926a9..85a76ad 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1618,6 +1618,13 @@ The following flags are defined: If datamatch flag is set, the event will be signaled only if the written value to the registered address is equal to datamatch in struct kvm_ioeventfd. +If KVM_CAP_IOEVENTFD_NO_LENGTH is present, and when DATAMATCH flag +is clear, len can be set to 0 to match access of any length. + +If KVM_CAP_IOEVENTFD_PF is present, and when DATAMATCH flag +is clear and len is set to 0, the specified address can overlap +a read-only memory region (as opposed to an MMIO region). + For virtio-ccw devices, addr contains the subchannel id and datamatch the virtqueue index. -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC 1/3] vmx: allow ioeventfd for EPT violations
Even when we skip data decoding, MMIO is slightly slower than port IO because it uses the page-tables, so the CPU must do a pagewalk on each access. This overhead is normally masked by using the TLB cache: but not so for KVM MMIO, where PTEs are marked as reserved and so are never cached. As ioeventfd memory is never read, make it possible to use RO pages on the host for ioeventfds, instead. The result is that TLBs are cached, which finally makes MMIO as fast as port IO. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- arch/x86/kvm/vmx.c | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9d1bfd3..ed44026 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5745,6 +5745,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { + skip_emulated_instruction(vcpu); + return 1; + } + trace_kvm_page_fault(gpa, exit_qualification); /* It is a write fault? */ -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC 3/3] pci-testdev: add RO pages for ioeventfd
On 2015/8/30 17:20, Michael S. Tsirkin wrote: This seems hackish - would it be better to create this region automatically within kvm? Suggestions are welcome. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- hw/misc/pci-testdev.c | 13 + 1 file changed, 13 insertions(+) diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c index 94141a3..55efc32 100644 --- a/hw/misc/pci-testdev.c +++ b/hw/misc/pci-testdev.c @@ -21,6 +21,7 @@ #include hw/pci/pci.h #include qemu/event_notifier.h #include qemu/osdep.h +#include sys/mman.h typedef struct PCITestDevHdr { uint8_t test; @@ -82,11 +83,13 @@ typedef struct PCITestDevState { PCIDevice parent_obj; /* public */ +MemoryRegion zeromr; MemoryRegion mmio; MemoryRegion mbar; MemoryRegion portio; IOTest *tests; int current; +void *zero; } PCITestDevState; #define TYPE_PCI_TEST_DEV pci-testdev @@ -242,6 +245,11 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp) uint8_t *pci_conf; char *name; int r, i; +d-zero = mmap(NULL, IOTEST_MEMSIZE * 2, PROT_READ, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + Do we need think about hotplugging pci-testdev ? If yes, then we should release some resources when hot-unplug a pci-testdev device: munmap(d-zero, ...) memory_region_del_subregion(d-mbar, d-mmio) ... Regards, -Gonglei +memory_region_init_ram_ptr(d-zeromr, OBJECT(d), pci-testdev-zero, 0x1000, d-zero); +memory_region_set_readonly(d-zeromr, true); pci_conf = pci_dev-config; @@ -286,6 +294,11 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error **errp) test-hasnotifier = false; continue; } + +if (test-hasnotifier !test-size) { +memory_region_add_subregion_overlap(d-mbar, le32_to_cpu(test-hdr-offset), +d-zeromr, 2 /* prio */); +} r = event_notifier_init(test-notifier, 0); assert(r = 0); test-hasnotifier = true; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH RFC 0/3] kvm add ioeventfd pf capability
One of the reasons MMIO is slower than port IO is because it requires a page table lookup. For normal memory accesses, this is solved by using the TLB cache - but MMIO entries are either not present or reserved and so are never cached. To fix, allow installing an ioeventfd on top of a read only memory region, which allows the CPU to cache the translations. Warning: svm patch is untested. Michael S. Tsirkin (3): vmx: allow ioeventfd for EPT violations svm: allow ioeventfd for NPT page faults kvm: add KVM_CAP_IOEVENTFD_PF capability include/uapi/linux/kvm.h | 1 + arch/x86/kvm/svm.c| 5 + arch/x86/kvm/vmx.c| 5 + arch/x86/kvm/x86.c| 1 + Documentation/virtual/kvm/api.txt | 7 +++ 5 files changed, 19 insertions(+) -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: arm64: Decode basic HYP fault information
On Tue, Aug 11, 2015 at 10:34:07AM +0300, Pavel Fedin wrote: Print exception vector name, exception class and PC translated to EL1 virtual address. Significantly aids debugging HYP crashes without special means like JTAG. my overall concern with this patch is that it adds complexity to an already really bad situation, and potentially increases the likelihood of not seeing any debug info at all. do you encounter this kind of panic a lot? I haven't experienced a great need for more hyp debugging help lately... Signed-off-by: Pavel Fedin p.fe...@samsung.com --- arch/arm64/kvm/handle_exit.c | 30 + arch/arm64/kvm/hyp.S | 46 +--- 2 files changed, 48 insertions(+), 28 deletions(-) diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index 29b184a..4d70d64 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -136,3 +136,33 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, return 0; } } + +static const char *const hyp_faults[] = { + EL2t Synchronous, + EL2t IRQ, + EL2t FIQ, + EL2t Error, + EL2h Synchronous, + EL2h IRQ, + EL2h FIQ, + EL2h Error, + EL1 Synchronous, + EL1 IRQ, + EL1 FIQ, + EL1 Error +}; + +void kvm_hyp_panic(unsigned long vector, unsigned int spsr, unsigned long pc, +unsigned int esr, unsigned long far, unsigned long hpfar, +unsigned long par, struct kvm_vcpu *vcpu) +{ + pr_emerg(Unhandled HYP exception %s on VCPU %p\n, + hyp_faults[vector], vcpu); + pr_emerg(PC : %016lx SPSR : %08x ESR: %08x\n, pc, spsr, esr); + pr_emerg(FAR: %016lx HPFAR: %016lx PAR: %016lx\n, far, hpfar, par); + + pr_emerg(Exception class: %02x Translated PC: %016lx\n, + esr ESR_ELx_EC_SHIFT, pc - HYP_PAGE_OFFSET + PAGE_OFFSET); + + panic(HYP panic); +} diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S index c81eaaf..62785cd 100644 --- a/arch/arm64/kvm/hyp.S +++ b/arch/arm64/kvm/hyp.S @@ -1060,13 +1060,11 @@ __kvm_hyp_panic: ldr x2, [x0, #VCPU_HOST_CONTEXT] kern_hyp_va x2 + mov x0, lr bl __restore_sysregs + mov lr, x0 -1: adr x0, __hyp_panic_str - adr x1, 2f - ldp x2, x3, [x1] - sub x0, x0, x2 - add x0, x0, x3 +1: mov x0, lr mrs x1, spsr_el2 mrs x2, elr_el2 mrs x3, esr_el2 @@ -1078,20 +1076,11 @@ __kvm_hyp_panic: mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) msr spsr_el2, lr - ldr lr, =panic + ldr lr, =kvm_hyp_panic msr elr_el2, lr eret - - .align 3 -2: .quad HYP_PAGE_OFFSET - .quad PAGE_OFFSET why can you get rid of this? ENDPROC(__kvm_hyp_panic) -__hyp_panic_str: - .ascii HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p PAR:%p\nVCPU:%p\n\0 - - .align 2 - /* * u64 kvm_call_hyp(void *hypfn, ...); * @@ -1115,26 +1104,27 @@ ENTRY(kvm_call_hyp) ret ENDPROC(kvm_call_hyp) -.macro invalid_vectorlabel, target +.macro invalid_vectorlabel, N, target .align 2 \label: + mov lr, #\N b \target ENDPROC(\label) .endm /* None of these should ever happen */ - invalid_vector el2t_sync_invalid, __kvm_hyp_panic - invalid_vector el2t_irq_invalid, __kvm_hyp_panic - invalid_vector el2t_fiq_invalid, __kvm_hyp_panic - invalid_vector el2t_error_invalid, __kvm_hyp_panic - invalid_vector el2h_sync_invalid, __kvm_hyp_panic - invalid_vector el2h_irq_invalid, __kvm_hyp_panic - invalid_vector el2h_fiq_invalid, __kvm_hyp_panic - invalid_vector el2h_error_invalid, __kvm_hyp_panic - invalid_vector el1_sync_invalid, __kvm_hyp_panic - invalid_vector el1_irq_invalid, __kvm_hyp_panic - invalid_vector el1_fiq_invalid, __kvm_hyp_panic - invalid_vector el1_error_invalid, __kvm_hyp_panic + invalid_vector el2t_sync_invalid, 0, __kvm_hyp_panic + invalid_vector el2t_irq_invalid, 1, __kvm_hyp_panic + invalid_vector el2t_fiq_invalid, 2, __kvm_hyp_panic + invalid_vector el2t_error_invalid, 3, __kvm_hyp_panic + invalid_vector el2h_sync_invalid, 4, __kvm_hyp_panic + invalid_vector el2h_irq_invalid, 5, __kvm_hyp_panic + invalid_vector el2h_fiq_invalid, 6, __kvm_hyp_panic + invalid_vector el2h_error_invalid, 7, __kvm_hyp_panic + invalid_vector el1_sync_invalid, 8, __kvm_hyp_panic + invalid_vector el1_irq_invalid, 9, __kvm_hyp_panic + invalid_vector el1_fiq_invalid, 10, __kvm_hyp_panic + invalid_vector el1_error_invalid, 11, __kvm_hyp_panic el1_sync:// Guest trapped into EL2 pushx0,
Re: [PATCH 3/3] KVM: arm64: Implement accessors for vGIC CPU interface registers
On 30 August 2015 at 17:50, Christoffer Dall christoffer.d...@linaro.org wrote: I had imagined we would encode the GICv3 register accesses through the device API and not through the system register API, since I'm not crazy about polluting the general system register handling logic with GIC registers solely for the purposes of migration. There's an interesting design question lurking under this about the extent to which you expose the h/w design split between the CPU interface and the GIC proper as part of the KVM APIs. I'm inclined to agree that it's better to for our purposes treat both bits as just part of an irqchip device, but I haven't given it a great deal of thought. (Similarly in the QEMU emulated-GICv3 case you could also split the CPU i/f more formally, or not. The kernel's choice would have implications for which way QEMU ends up going, I think.) thanks -- PMM -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/9] Rework architected timer and fix UEFI reset
The architected timer integration with the vgic had some shortcomings in that certain guests (one being UEFI) weren't fully supported. In fixing this I also found that we are scheduling the hrtimer for the virtual timer way too often, with a potential performance overhead. This series tries to address these problems in proviging level-triggered semantics for the arch timer and vgic intergration and seeks to clarify the behavior when setting/clearing the active state on the physical distributor. Series based on kvmarm/next and also available at: https://git.linaro.org/people/christoffer.dall/linux-kvm-arm.git timer-rework Christoffer Dall (9): KVM: Add kvm_arch_vcpu_{un}blocking callbacks arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block arm/arm64: KVM: vgic: Factor out level irq processing on guest exit arm/arm64: Implement GICD_ICFGR as RO for PPIs arm/arm64: KVM: Use appropriate define in VGIC reset code arm/arm64: KVM: Add mapped interrupts documentation arm/arm64: KVM: vgic: Move active state handling to flush_hwstate arm/arm64: KVM: Rework the arch timer to use level-triggered semantics arm/arm64: KVM: arch timer: Reset CNTV_CTL to 0 Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt | 59 ++ arch/arm/kvm/arm.c | 21 ++- arch/mips/include/asm/kvm_host.h | 2 + arch/powerpc/include/asm/kvm_host.h| 2 + arch/s390/include/asm/kvm_host.h | 2 + arch/x86/include/asm/kvm_host.h| 3 + include/kvm/arm_arch_timer.h | 4 +- include/kvm/arm_vgic.h | 3 - include/linux/kvm_host.h | 2 + virt/kvm/arm/arch_timer.c | 160 +++- virt/kvm/arm/vgic.c| 201 +++-- virt/kvm/kvm_main.c| 3 + 12 files changed, 308 insertions(+), 154 deletions(-) create mode 100644 Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt -- 2.1.2.330.g565301e.dirty -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] KVM: arm64: Implement accessors for vGIC CPU interface registers
On Fri, Aug 28, 2015 at 03:56:12PM +0300, Pavel Fedin wrote: This commit adds accessors for all registers, being part of saved vGIC context in the form of ICH_VMCR_EL2. This is necessary for enabling vGICv3 live migration. Signed-off-by: Pavel Fedin p.fe...@samsung.com --- arch/arm64/kvm/sys_regs.c | 176 + include/linux/irqchip/arm-gic-v3.h | 18 +++- 2 files changed, 192 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8cc4a5e..7a4f982 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -23,6 +23,7 @@ #include linux/kvm_host.h #include linux/mm.h #include linux/uaccess.h +#include linux/irqchip/arm-gic-v3.h #include asm/cacheflush.h #include asm/cputype.h @@ -136,6 +137,162 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, return true; } +static bool access_gic_ctlr(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 val; + struct vgic_v3_cpu_if *vgicv3 = vcpu-arch.vgic_cpu.vgic_v3; + + if (vcpu-kvm-arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) + return false; + + if (p-is_write) { + val = *vcpu_reg(vcpu, p-Rt); + + vgicv3-vgic_vmcr = ~(ICH_VMCR_CBPR|ICH_VMCR_EOIM); + vgicv3-vgic_vmcr |= (val (ICH_VMCR_CBPR_SHIFT - + ICC_CTLR_EL1_CBPR_SHIFT)) + ICH_VMCR_CBPR; + vgicv3-vgic_vmcr |= (val (ICH_VMCR_EOIM_SHIFT - + ICC_CTLR_EL1_EOImode_SHIFT)) + ICH_VMCR_EOIM; + } else { + asm volatile(mrs_s %0, __stringify(ICC_IAR1_EL1) + : =r (val)); + val = (ICC_CTLR_EL1_A3V | ICC_CTLR_EL1_SEIS | + ICC_CTLR_EL1_IDbits_MASK | ICC_CTLR_EL1_PRIbits_MASK); + val |= (vgicv3-vgic_vmcr ICH_VMCR_CBPR) + (ICH_VMCR_CBPR_SHIFT - ICC_CTLR_EL1_CBPR_SHIFT); + val |= (vgicv3-vgic_vmcr ICH_VMCR_EOIM) + (ICH_VMCR_EOIM_SHIFT - ICC_CTLR_EL1_EOImode_SHIFT); + + *vcpu_reg(vcpu, p-Rt) = val; + } + + return true; +} + +static bool access_gic_pmr(struct kvm_vcpu *vcpu, +const struct sys_reg_params *p, +const struct sys_reg_desc *r) +{ + u64 val; + struct vgic_v3_cpu_if *vgicv3 = vcpu-arch.vgic_cpu.vgic_v3; + + if (vcpu-kvm-arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) + return false; + + if (p-is_write) { + val = *vcpu_reg(vcpu, p-Rt); + vgicv3-vgic_vmcr = ~ICH_VMCR_PMR_MASK; + vgicv3-vgic_vmcr |= (val ICH_VMCR_PMR_SHIFT) + ICH_VMCR_PMR_MASK; + } else { + val = (vgicv3-vgic_vmcr ICH_VMCR_PMR_MASK) + ICH_VMCR_PMR_SHIFT; + *vcpu_reg(vcpu, p-Rt) = val; + } + + return true; +} + +static bool access_gic_bpr0(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 val; + struct vgic_v3_cpu_if *vgicv3 = vcpu-arch.vgic_cpu.vgic_v3; + + if (vcpu-kvm-arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) + return false; + + if (p-is_write) { + val = *vcpu_reg(vcpu, p-Rt); + vgicv3-vgic_vmcr = ~ICH_VMCR_BPR0_MASK; + vgicv3-vgic_vmcr |= (val ICH_VMCR_BPR0_SHIFT) + ICH_VMCR_BPR0_MASK; + } else { + val = (vgicv3-vgic_vmcr ICH_VMCR_BPR0_MASK) + ICH_VMCR_BPR0_SHIFT; + *vcpu_reg(vcpu, p-Rt) = val; + } + + return true; +} + +static bool access_gic_bpr1(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct sys_reg_desc *r) +{ + u64 val; + struct vgic_v3_cpu_if *vgicv3 = vcpu-arch.vgic_cpu.vgic_v3; + + if (vcpu-kvm-arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3) + return false; + + if (p-is_write) { + val = *vcpu_reg(vcpu, p-Rt); + vgicv3-vgic_vmcr = ~ICH_VMCR_BPR1_MASK; + vgicv3-vgic_vmcr |= (val ICH_VMCR_BPR1_SHIFT) + ICH_VMCR_BPR1_MASK; + } else { + val = (vgicv3-vgic_vmcr ICH_VMCR_BPR1_MASK) + ICH_VMCR_BPR1_SHIFT; + *vcpu_reg(vcpu, p-Rt) = val; + } + + return true; +} + +static bool access_gic_grpen0(struct kvm_vcpu *vcpu, + const struct sys_reg_params *p, + const struct
Re: [PATCH 1/3] KVM: arm64: Implement vGICv3 distributor and redistributor access from userspace
On Fri, Aug 28, 2015 at 03:56:10PM +0300, Pavel Fedin wrote: The access is done similar to GICv2, using KVM_DEV_ARM_VGIC_GRP_DIST_REGS and KVM_DEV_ARM_VGIC_GRP_REDIST_REGS with KVM_SET_DEVICE_ATTR and KVM_GET_DEVICE_ATTR ioctls. Registers are always assumed to be of their native size, 4 or 8 bytes. Signed-off-by: Pavel Fedin p.fe...@samsung.com --- arch/arm64/include/uapi/asm/kvm.h | 1 + virt/kvm/arm/vgic-v3-emul.c | 186 +++--- 2 files changed, 172 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 0cd7b59..2936651 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -203,6 +203,7 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3 #define KVM_DEV_ARM_VGIC_GRP_CTRL4 #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c index e661e7f..b3847e1 100644 --- a/virt/kvm/arm/vgic-v3-emul.c +++ b/virt/kvm/arm/vgic-v3-emul.c @@ -39,6 +39,7 @@ #include linux/kvm.h #include linux/kvm_host.h #include linux/interrupt.h +#include linux/uaccess.h #include linux/irqchip/arm-gic-v3.h #include kvm/arm_vgic.h @@ -990,6 +991,107 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) vgic_kick_vcpus(vcpu-kvm); } +static int vgic_v3_attr_regs_access(struct kvm_device *dev, + struct kvm_device_attr *attr, + void *reg, u32 len, bool is_write) using a void pointer for the register with variable length here is likely to cause endianness headaches. Can we use a typed pointer here? +{ + const struct vgic_io_range *r = NULL, *ranges; + phys_addr_t offset; + int ret, cpuid, c; + struct kvm_vcpu *vcpu, *tmp_vcpu; + struct vgic_dist *vgic; + struct kvm_exit_mmio mmio; + u64 data; + + offset = attr-attr KVM_DEV_ARM_VGIC_OFFSET_MASK; + cpuid = (attr-attr KVM_DEV_ARM_VGIC_CPUID_MASK) + KVM_DEV_ARM_VGIC_CPUID_SHIFT; + + mutex_lock(dev-kvm-lock); + + ret = vgic_init(dev-kvm); + if (ret) + goto out; + + if (cpuid = atomic_read(dev-kvm-online_vcpus)) { + ret = -EINVAL; + goto out; + } + + vcpu = kvm_get_vcpu(dev-kvm, cpuid); + vgic = dev-kvm-arch.vgic; + + mmio.len = len; + mmio.is_write = is_write; + mmio.data = data; + if (is_write) { + if (len == 8) + data = cpu_to_le64(*((u64 *)reg)); + else + mmio_data_write(mmio, ~0, *((u32 *)reg)); + } + switch (attr-group) { + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS: + mmio.phys_addr = vgic-vgic_dist_base + offset; + ranges = vgic_v3_dist_ranges; + break; + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS: + mmio.phys_addr = vgic-vgic_redist_base + offset; + ranges = vgic_redist_ranges; + break; + default: + BUG(); + } + r = vgic_find_range(ranges, 4, offset); + + if (unlikely(!r || !r-handle_mmio)) { + ret = -ENXIO; + goto out; + } + + + spin_lock(vgic-lock); + + /* + * Ensure that no other VCPU is running by checking the vcpu-cpu + * field. If no other VPCUs are running we can safely access the VGIC + * state, because even if another VPU is run after this point, that + * VCPU will not touch the vgic state, because it will block on + * getting the vgic-lock in kvm_vgic_sync_hwstate(). + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev-kvm) { + if (unlikely(tmp_vcpu-cpu != -1)) { + ret = -EBUSY; + goto out_vgic_unlock; + } + } + + /* + * Move all pending IRQs from the LRs on all VCPUs so the pending + * state can be properly represented in the register state accessible + * through this API. + */ + kvm_for_each_vcpu(c, tmp_vcpu, dev-kvm) + vgic_unqueue_irqs(tmp_vcpu); + + offset -= r-base; + r-handle_mmio(vcpu, mmio, offset); + + if (!is_write) { + if (len == 8) + *(u64 *)reg = le64_to_cpu(data); + else + *(u32 *)reg = mmio_data_read(mmio, ~0); + } + + ret = 0; +out_vgic_unlock: + spin_unlock(vgic-lock); +out: + mutex_unlock(dev-kvm-lock); + return ret; I feel like there's a lot of reused code with the v2 vgic here. Can you look at reusing some of the logic? +} + static int vgic_v3_create(struct kvm_device *dev, u32 type) { return
Re: [PATCH 0/3] KVM: arm64: Implement API for vGICv3 live migration
On Fri, Aug 28, 2015 at 03:56:09PM +0300, Pavel Fedin wrote: This patchset adds necessary userspace API in order to support vGICv3 live migration. This includes accessing GIC distributor and redistributor memory regions using device attribute ioctls, and system registers of CPU interface using register get/set ioctls. This obviously lacks a clear description of the API in Documentation/virtual/kvm/devices/arm-vgic.txt Pavel Fedin (3): KVM: arm64: Implement vGICv3 distributor and redistributor access from userspace KVM: arm64: Allow to use accessors in KVM_SET_ONE_REG and KVM_GET_ONE_REG KVM: arm64: Implement accessors for vGIC CPU interface registers arch/arm64/include/uapi/asm/kvm.h | 1 + arch/arm64/kvm/sys_regs.c | 223 - include/linux/irqchip/arm-gic-v3.h | 18 ++- virt/kvm/arm/vgic-v3-emul.c| 186 --- 4 files changed, 405 insertions(+), 23 deletions(-) -- 2.4.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/9] arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block
We currently schedule a soft timer every time we exit the guest if the timer did not expire while running the guest. This is really not necessary, because the only work we do in the timer work function is to kick the vcpu. Kicking the vcpu does two things: (1) If the vpcu thread is on a waitqueue, make it runnable and remove it from the waitqueue. (2) If the vcpu is running on a different physical CPU from the one doing the kick, it sends a reschedule IPI. The second case cannot happen, because the soft timer is only ever scheduled when the vcpu is not running. The first case is only relevant when the vcpu thread is on a waitqueue, which is only the case when the vcpu thread has called kvm_vcpu_block(). Therefore, we only need to make sure a timer is scheduled for kvm_vcpu_block(), which we do by encapsulating all calls to kvm_vcpu_block() with kvm_timer_{un}schedule calls. Additionally, we only schedule a soft timer if the timer is enabled and unmasked, since it is useless otherwise. Note that theoretically userspace can use the SET_ONE_REG interface to change registers that should cause the timer to fire, even if the vcpu is blocked without a scheduled timer, but this case was not supported before this patch and we leave it for future work for now. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- arch/arm/include/asm/kvm_host.h | 3 -- arch/arm/kvm/arm.c| 10 + arch/arm64/include/asm/kvm_host.h | 3 -- include/kvm/arm_arch_timer.h | 2 + virt/kvm/arm/arch_timer.c | 89 +-- 5 files changed, 70 insertions(+), 37 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 86fcf6e..dcba0fa 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -236,7 +236,4 @@ static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} - #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index ce404a5..bdf8871 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu) return kvm_timer_should_fire(vcpu); } +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) +{ + kvm_timer_schedule(vcpu); +} + +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) +{ + kvm_timer_unschedule(vcpu); +} + int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) { /* Force users to call KVM_ARM_VCPU_INIT */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index dd143f5..415938d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -257,7 +257,4 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); -static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} -static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} - #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index e1e4d7c..ef14cc1 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); bool kvm_timer_should_fire(struct kvm_vcpu *vcpu); +void kvm_timer_schedule(struct kvm_vcpu *vcpu); +void kvm_timer_unschedule(struct kvm_vcpu *vcpu); #endif diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 76e38d2..018f3d6 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -111,14 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt) return HRTIMER_NORESTART; } +static bool kvm_timer_irq_enabled(struct kvm_vcpu *vcpu) +{ + struct arch_timer_cpu *timer = vcpu-arch.timer_cpu; + + return !(timer-cntv_ctl ARCH_TIMER_CTRL_IT_MASK) + (timer-cntv_ctl ARCH_TIMER_CTRL_ENABLE) + !kvm_vgic_get_phys_irq_active(timer-map); +} + bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = vcpu-arch.timer_cpu; cycle_t cval, now; - if ((timer-cntv_ctl ARCH_TIMER_CTRL_IT_MASK) || - !(timer-cntv_ctl ARCH_TIMER_CTRL_ENABLE) || - kvm_vgic_get_phys_irq_active(timer-map)) + if (!kvm_timer_irq_enabled(vcpu)) return false; cval = timer-cntv_cval; @@ -127,24 +134,59 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return cval = now; } -/** - * kvm_timer_flush_hwstate - prepare to move the virt timer to the
[PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics
The arch timer currently uses edge-triggered semantics in the sense that the line is never sampled by the vgic and lowering the line from the timer to the vgic doesn't have any affect on the pending state of virtual interrupts in the vgic. This means that we do not support a guest with the otherwise valid behavior of (1) disable interrupts (2) enable the timer (3) disable the timer (4) enable interrupts. Such a guest would validly not expect to see any interrupts on real hardware, but will see interrupts on KVM. This patches fixes this shortcoming through the following series of changes. First, we change the flow of the timer/vgic sync/flush operations. Now the timer is always flushed/synced before the vgic, because the vgic samples the state of the timer output. This has the implication that we move the timer operations in to non-preempible sections, but that is fine after the previous commit getting rid of hrtimer schedules on every entry/exit. Second, we change the internal behavior of the timer, letting the timer keep track of its previous output state, and only lower/raise the line to the vgic when the state changes. Note that in theory this could have been accomplished more simply by signalling the vgic every time the state *potentially* changed, but we don't want to be hitting the vgic more often than necessary. Third, we get rid of the use of the map-active field in the vgic and instead simply set the interrupt as active on the physical distributor whenever we signal a mapped interrupt to the guest, and we reset the active state when we sync back the HW state from the vgic. Fourth, and finally, we now initialize the timer PPIs (and all the other unused PPIs for now), to be level-triggered, and modify the sync code to sample the line state on HW sync and re-inject a new interrupt if it is still pending at that time. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- arch/arm/kvm/arm.c | 11 +-- include/kvm/arm_arch_timer.h | 2 +- include/kvm/arm_vgic.h | 3 -- virt/kvm/arm/arch_timer.c| 68 +++- virt/kvm/arm/vgic.c | 67 +++ 5 files changed, 81 insertions(+), 70 deletions(-) diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index bdf8871..102a4aa 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret = 0 || need_new_vmid_gen(vcpu-kvm)) { local_irq_enable(); + kvm_timer_sync_hwstate(vcpu); kvm_vgic_sync_hwstate(vcpu); preempt_enable(); - kvm_timer_sync_hwstate(vcpu); continue; } @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) kvm_guest_exit(); trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + /* +* We must sync the timer state before the vgic state so that +* the vgic can properly sample the updated state of the +* interrupt line. +*/ + kvm_timer_sync_hwstate(vcpu); + kvm_vgic_sync_hwstate(vcpu); preempt_enable(); - kvm_timer_sync_hwstate(vcpu); - ret = handle_exit(vcpu, run, ret); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index ef14cc1..1800227 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -51,7 +51,7 @@ struct arch_timer_cpu { boolarmed; /* Timer IRQ */ - const struct kvm_irq_level *irq; + struct kvm_irq_levelirq; /* VGIC mapping */ struct irq_phys_map *map; diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index d901f1a..99011a0 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -163,7 +163,6 @@ struct irq_phys_map { u32 virt_irq; u32 phys_irq; u32 irq; - boolactive; }; struct irq_phys_map_entry { @@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, int virt_irq, int irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); #define irqchip_in_kernel(k) (!!((k)-arch.vgic.in_kernel)) #define vgic_initialized(k)(!!((k)-arch.vgic.nr_cpus)) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index
[PATCH 5/9] arm/arm64: KVM: Use appropriate define in VGIC reset code
We currently initialize the SGIs to be enabled in the VGIC code, but we use the VGIC_NR_PPIS define for this purpose, instead of the the more natural VGIC_NR_SGIS. Change this slightly confusing use of the defines. Note: This should have no functional change, as both names are defined to the number 16. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- virt/kvm/arm/vgic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 0ba92d3..8299c24 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -2099,7 +2099,7 @@ int vgic_init(struct kvm *kvm) } for (i = 0; i dist-nr_irqs; i++) { - if (i VGIC_NR_PPIS) + if (i VGIC_NR_SGIS) vgic_bitmap_set_irq_val(dist-irq_enabled, vcpu-vcpu_id, i, 1); if (i VGIC_NR_PRIVATE_IRQS) -- 2.1.2.330.g565301e.dirty -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 9/9] arm/arm64: KVM: arch timer: Reset CNTV_CTL to 0
Provide a better quality of implementation and be architecture compliant on ARMv7 for the architected timer by resetting the CNTV_CTL to 0 on reset of the timer, and call kvm_timer_update_state(vcpu) at the same time, ensuring the timer output is not asserted after, for example, a PSCI system reset. This change alone fixes the UEFI reset issue reported by Laszlo back in February. Cc: Laszlo Ersek ler...@redhat.com Cc: Ard Biesheuvel ard.biesheu...@linaro.org Cc: Drew Jones drjo...@redhat.com Cc: Wei Huang w...@redhat.com Cc: Peter Maydell peter.mayd...@linaro.org Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- virt/kvm/arm/arch_timer.c | 9 + 1 file changed, 9 insertions(+) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 747302f..8a0fdfc 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -255,6 +255,15 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, timer-irq.irq = irq-irq; /* +* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 +* and to 0 for ARMv7. We provide an implementation that always +* resets the timer to be disabled and unmasked and is compliant with +* the ARMv7 architecture. +*/ + timer-cntv_ctl = 0; + kvm_timer_update_state(vcpu); + + /* * Tell the VGIC that the virtual interrupt is tied to a * physical interrupt. We do that once per VCPU. */ -- 2.1.2.330.g565301e.dirty -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 7/9] arm/arm64: KVM: vgic: Move active state handling to flush_hwstate
We currently set the physical active state only when we *inject* a new pending virtual interrupt, but this is actually not correct, because we could have been preempted and run something else on the system that resets the active state to clear. This causes us to run the VM with the timer set to fire, but without setting the physical active state. The solution is to always check the LR configurations, and we if have a mapped interrupt in th LR in either the pending or active state (virtual), then set the physical active state. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- virt/kvm/arm/vgic.c | 42 ++ 1 file changed, 26 insertions(+), 16 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 8299c24..9ed8d53 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1144,26 +1144,11 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq, struct irq_phys_map *map; map = vgic_irq_map_search(vcpu, irq); - /* -* If we have a mapping, and the virtual interrupt is -* being injected, then we must set the state to -* active in the physical world. Otherwise the -* physical interrupt will fire and the guest will -* exit before processing the virtual interrupt. -*/ if (map) { - int ret; - - BUG_ON(!map-active); vlr.hwirq = map-phys_irq; vlr.state |= LR_HW; vlr.state = ~LR_EOI_INT; - ret = irq_set_irqchip_state(map-irq, - IRQCHIP_STATE_ACTIVE, - true); - WARN_ON(ret); - /* * Make sure we're not going to sample this * again, as a HW-backed interrupt cannot be @@ -1255,7 +1240,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu; struct vgic_dist *dist = vcpu-kvm-arch.vgic; unsigned long *pa_percpu, *pa_shared; - int i, vcpu_id; + int i, vcpu_id, lr, ret; int overflow = 0; int nr_shared = vgic_nr_shared_irqs(dist); @@ -1310,6 +1295,31 @@ epilog: */ clear_bit(vcpu_id, dist-irq_pending_on_cpu); } + + for (lr = 0; lr vgic-nr_lr; lr++) { + struct vgic_lr vlr; + + if (!test_bit(lr, vgic_cpu-lr_used)) + continue; + + vlr = vgic_get_lr(vcpu, lr); + + /* +* If we have a mapping, and the virtual interrupt is +* presented to the guest (as pending or active), then we must +* set the state to active in the physical world. See +* Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt. +*/ + if (vlr.state LR_HW) { + struct irq_phys_map *map; + map = vgic_irq_map_search(vcpu, vlr.irq); + + ret = irq_set_irqchip_state(map-irq, + IRQCHIP_STATE_ACTIVE, + true); + WARN_ON(ret); + } + } } static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) -- 2.1.2.330.g565301e.dirty -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/9] arm/arm64: Implement GICD_ICFGR as RO for PPIs
The GICD_ICFGR allows the bits for the SGIs and PPIs to be read only. We currently simulate this behavior by writing a hardcoded value to the register for the SGIs and PPIs on every write of these bits to the register (ignoring what the guest actually wrote), and by writing the same value as the reset value to the register. This is a bit counter-intuitive, as the register is RO for these bits, and we can just implement it that way, allowing us to control the value of the bits purely in the reset code. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- virt/kvm/arm/vgic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index c5750be..0ba92d3 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -655,7 +655,7 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio *mmio, ACCESS_READ_VALUE | ACCESS_WRITE_VALUE); if (mmio-is_write) { if (offset 8) { - *reg = ~0U; /* Force PPIs/SGIs to 1 */ + /* Ignore writes to read-only SGI and PPI bits */ return false; } -- 2.1.2.330.g565301e.dirty -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/9] KVM: Add kvm_arch_vcpu_{un}blocking callbacks
Some times it is useful for architecture implementations of KVM to know when the VCPU thread is about to block or when it comes back from blocking (arm/arm64 needs to know this to properly implement timers, for example). Therefore provide a generic architecture callback function in line with what we do elsewhere for KVM generic-arch interactions. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- arch/arm/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/mips/include/asm/kvm_host.h| 2 ++ arch/powerpc/include/asm/kvm_host.h | 2 ++ arch/s390/include/asm/kvm_host.h| 2 ++ arch/x86/include/asm/kvm_host.h | 3 +++ include/linux/kvm_host.h| 2 ++ virt/kvm/kvm_main.c | 3 +++ 8 files changed, 20 insertions(+) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index dcba0fa..86fcf6e 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -236,4 +236,7 @@ static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 415938d..dd143f5 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -257,4 +257,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu); void kvm_arm_clear_debug(struct kvm_vcpu *vcpu); void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu); +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* __ARM64_KVM_HOST_H__ */ diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h index e8c8d9d..58f0f4d 100644 --- a/arch/mips/include/asm/kvm_host.h +++ b/arch/mips/include/asm/kvm_host.h @@ -845,5 +845,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif /* __MIPS_KVM_HOST_H__ */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d91f65b..179f9a7 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -702,5 +702,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {} static inline void kvm_arch_exit(void) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif /* __POWERPC_KVM_HOST_H__ */ diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 3024acb..04a97df 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -640,5 +640,7 @@ static inline void kvm_arch_memslots_updated(struct kvm *kvm, struct kvm_memslot static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {} static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm, struct kvm_memory_slot *slot) {} +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} #endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 2a7f5d7..26c4086 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1202,4 +1202,7 @@ int __x86_set_memory_region(struct kvm *kvm, int x86_set_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem); +static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {} +static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {} + #endif /* _ASM_X86_KVM_HOST_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9564fd7..87d7be6 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -619,6 +619,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void
[PATCH 6/9] arm/arm64: KVM: Add mapped interrupts documentation
Mapped interrupts on arm/arm64 is a tricky concept and the way we deal with them is not apparently easy to understand by reading various specs. Therefore, add a proper documentation file explaining the flow and rationale of the behavior of the vgic. Some of this text was contributed by Marc Zyngier. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt | 59 ++ 1 file changed, 59 insertions(+) create mode 100644 Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt diff --git a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt new file mode 100644 index 000..49e1357 --- /dev/null +++ b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt @@ -0,0 +1,59 @@ +KVM/ARM VGIC Mapped Interrupts +== + +Setting the Physical Active State for Edge vs. Level Triggered IRQs +--- + +Mapped non-shared interrupts injected to a guest should always mark the +interrupt as active on the physical distributor. + +The reasoning for level-triggered interrupts: +For level-triggered interrupts, we have to mark the interrupt as active +on the physical distributor, because otherwise, as the line remains +asserted, the guest will never execute because the host will keep taking +interrupts. As soon as the guest deactivates the interrupt, the +physical line is sampled by the hardware again and the host takes a new +interrupt if the physical line is still asserted. + +The reasoning for edge-triggered interrupts: +For edge-triggered interrupts, if we set the HW bit in the LR we also +have to mark the interrupt as active on the physical distributor. If we +don't set the physical active bit and the interrupt hits again before +the guest has deactivated the interrupt, the interrupt goes to the host, +which cannot set the state to ACTIVE+PENDING in the LR, because that is +not supported when setting the HW bit in the LR. + +An alternative could be to not use HW bit at all, and inject +edge-triggered interrupts from a physical assigned device as pure +virtual interrupts, but that would potentially slow down handling of the +interrupt in the guest, because a physical interrupt occurring in the +middle of the guest ISR would preempt the guest for the host to handle +the interrupt. + + +Life Cycle for Forwarded Physical Interrupts + + +By forwarded physical interrupts we mean interrupts presented to a guest +representing a real HW event originally signaled to the host as a +physical interrupt and injecting this as a virtual interrupt with the HW +bit set in the LR. + +The state of such an interrupt is managed in the following way: + + - LR.Pending must be set when the interrupt is first injected, because this +is the only way the GICV interface is going to present it to the guest. + - LR.Pending will stay set as long as the guest has not acked the interrupt. + - LR.Pending transitions to LR.Active on read of IAR, as expected. + - On EOI, the *physical distributor* active bit gets cleared, but the +LR.Active is left untouched - it looks like the GIC can only clear a +single bit (either the virtual active, or the physical one). + - This means we cannot trust LR.Active to find out about the state of the +interrupt, and we definitely need to look at the distributor version. + +Consequently, when we context switch the state of a VCPU with forwarded +physical interrupts, we must context switch set pending *or* active bits in the +LR for that VCPU until the guest has deactivated the physical interrupt, and +then clear the corresponding bits in the LR. If we ever set an LR to pending or +mapped when switching in a VCPU for a forwarded physical interrupt, we must also +set the active state on the *physical distributor*. -- 2.1.2.330.g565301e.dirty -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/9] arm/arm64: KVM: vgic: Factor out level irq processing on guest exit
Currently vgic_process_maintenance() processes dealing with a completed level-triggered interrupt directly, but we are soon going to reuse this logic for level-triggered mapped interrupts with the HW bit set, so move this logic into a separate static function. Probably the most scary part of this commit is convincing yourself that the current flow is safe compared to the old one. In the following I try to list the changes and why they are harmless: Move vgic_irq_clear_queued after kvm_notify_acked_irq: Harmless because the effect of clearing the queued flag wrt. kvm_set_irq is only that vgic_update_irq_pending does not set the pending bit on the emulated CPU interface or in the pending_on_cpu bitmask, but we set this in __kvm_vgic_sync_hwstate later on if the level is stil high. Move vgic_set_lr before kvm_notify_acked_irq: Also, harmless because the LR are cpu-local operations and kvm_notify_acked only affects the dist Move vgic_dist_irq_clear_soft_pend after kvm_notify_acked_irq: Also harmless because it's just a bit which is cleared and altering the line state does not affect this bit. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- virt/kvm/arm/vgic.c | 88 ++--- 1 file changed, 50 insertions(+), 38 deletions(-) diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index 9eb489a..c5750be 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -1312,12 +1312,56 @@ epilog: } } +static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) +{ + int level_pending = 0; + + vlr.state = 0; + vlr.hwirq = 0; + vgic_set_lr(vcpu, lr, vlr); + + /* +* If the IRQ was EOIed (called from vgic_process_maintenance) or it +* went from active to non-active (called from vgic_sync_hwirq) it was +* also ACKed and we we therefore assume we can clear the soft pending +* state (should it had been set) for this interrupt. +* +* Note: if the IRQ soft pending state was set after the IRQ was +* acked, it actually shouldn't be cleared, but we have no way of +* knowing that unless we start trapping ACKs when the soft-pending +* state is set. +*/ + vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); + + /* +* Tell the gic to start sampling the line of this interrupt again. +*/ + vgic_irq_clear_queued(vcpu, vlr.irq); + + /* Any additional pending interrupt? */ + if (vgic_dist_irq_get_level(vcpu, vlr.irq)) { + vgic_cpu_irq_set(vcpu, vlr.irq); + level_pending = 1; + } else { + vgic_dist_irq_clear_pending(vcpu, vlr.irq); + vgic_cpu_irq_clear(vcpu, vlr.irq); + } + + /* +* Despite being EOIed, the LR may not have +* been marked as empty. +*/ + vgic_sync_lr_elrsr(vcpu, lr, vlr); + + return level_pending; +} + static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) { u32 status = vgic_get_interrupt_status(vcpu); struct vgic_dist *dist = vcpu-kvm-arch.vgic; - bool level_pending = false; struct kvm *kvm = vcpu-kvm; + int level_pending = 0; kvm_debug(STATUS = %08x\n, status); @@ -1332,54 +1376,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) for_each_set_bit(lr, eisr_ptr, vgic-nr_lr) { struct vgic_lr vlr = vgic_get_lr(vcpu, lr); - WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); - spin_lock(dist-lock); - vgic_irq_clear_queued(vcpu, vlr.irq); + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); WARN_ON(vlr.state LR_STATE_MASK); - vlr.state = 0; - vgic_set_lr(vcpu, lr, vlr); - /* -* If the IRQ was EOIed it was also ACKed and we we -* therefore assume we can clear the soft pending -* state (should it had been set) for this interrupt. -* -* Note: if the IRQ soft pending state was set after -* the IRQ was acked, it actually shouldn't be -* cleared, but we have no way of knowing that unless -* we start trapping ACKs when the soft-pending state -* is set. -*/ - vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq); /* * kvm_notify_acked_irq calls kvm_set_irq() -* to reset the IRQ level. Need to release the -* lock for kvm_set_irq to grab it. +* to reset the IRQ level, which grabs the dist-lock +
[PATCH 1/2] arm/arm64: KVM: Add tracepoints for vgic and timer
The VGIC and timer code for KVM arm/arm64 doesn't have any tracepoints or tracepoint infrastructure defined. Rewriting some of the timer code handling showed me how much we need this, so let's add these simple trace points once and for all and we can easily expand with additional trace points in these files as we go along. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- virt/kvm/arm/arch_timer.c | 4 ++ virt/kvm/arm/trace.h | 97 +++ virt/kvm/arm/vgic.c | 9 + 3 files changed, 110 insertions(+) create mode 100644 virt/kvm/arm/trace.h diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 8a0fdfc..f63b208 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -28,6 +28,8 @@ #include kvm/arm_vgic.h #include kvm/arm_arch_timer.h +#include trace.h + static struct timecounter *timecounter; static struct workqueue_struct *wqueue; static unsigned int host_vtimer_irq; @@ -128,6 +130,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu) BUG_ON(!vgic_initialized(vcpu-kvm)); + trace_kvm_timer_update_irq(vcpu-vcpu_id, timer-map-virt_irq, + timer-irq-level); ret = kvm_vgic_inject_mapped_irq(vcpu-kvm, vcpu-vcpu_id, timer-map, timer-irq.level); diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h new file mode 100644 index 000..48c3c90 --- /dev/null +++ b/virt/kvm/arm/trace.h @@ -0,0 +1,97 @@ +#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_KVM_H + +#include linux/tracepoint.h + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM kvm + +/* + * Tracepoints for vgic + */ +TRACE_EVENT(kvm_vgic_set_irqchip_active, + TP_PROTO(unsigned long vcpu_id, __u32 irq), + TP_ARGS(vcpu_id, irq), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_id ) + __field(__u32, irq ) + ), + + TP_fast_assign( + __entry-vcpu_id= vcpu_id; + __entry-irq= irq; + ), + + TP_printk(VCPU: %ld, IRQ %d, __entry-vcpu_id, __entry-irq) +); + +TRACE_EVENT(kvm_vgic_clear_irqchip_active, + TP_PROTO(unsigned long vcpu_id, __u32 irq), + TP_ARGS(vcpu_id, irq), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_id ) + __field(__u32, irq ) + ), + + TP_fast_assign( + __entry-vcpu_id= vcpu_id; + __entry-irq= irq; + ), + + TP_printk(VCPU: %ld, IRQ %d, __entry-vcpu_id, __entry-irq) +); + +TRACE_EVENT(vgic_update_irq_pending, + TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_id ) + __field(__u32, irq ) + __field(bool, level ) + ), + + TP_fast_assign( + __entry-vcpu_id= vcpu_id; + __entry-irq= irq; + __entry-level = level; + ), + + TP_printk(VCPU: %ld, IRQ %d, level: %d, + __entry-vcpu_id, __entry-irq, __entry-level) +); + +/* + * Tracepoints for arch_timer + */ +TRACE_EVENT(kvm_timer_inject_irq, + TP_PROTO(unsigned long vcpu_id, __u32 irq, int level), + TP_ARGS(vcpu_id, irq, level), + + TP_STRUCT__entry( + __field(unsigned long, vcpu_id ) + __field(__u32, irq ) + __field(int,level ) + ), + + TP_fast_assign( + __entry-vcpu_id= vcpu_id; + __entry-irq= irq; + __entry-level = level; + ), + + TP_printk(VCPU: %ld, IRQ %d, level %d, + __entry-vcpu_id, __entry-irq, __entry-level) +); + +#endif /* _TRACE_KVM_H */ + +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE trace + +/* This part must be outside protection */ +#include trace/define_trace.h diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c index f4ea950..45c95a0 100644 --- a/virt/kvm/arm/vgic.c +++ b/virt/kvm/arm/vgic.c @@ -34,6 +34,9 @@ #include asm/kvm.h #include kvm/iodev.h +#define CREATE_TRACE_POINTS +#include trace.h + /* * How the whole thing works (courtesy of Christoffer Dall): * @@ -1314,6 +1317,8 @@ epilog: struct irq_phys_map *map; map = vgic_irq_map_search(vcpu, vlr.irq); + trace_kvm_vgic_set_irqchip_active(vcpu-vcpu_id, + vlr.irq); ret =
[PATCH 0/2] Improve and add tracepoints for KVM on arm/arm64
The timer and vgic code didn't have tracepoints for quite a while and we've been adding those ad-hoc when doing development a lot of times. Add some simple tracepoints for those parts of KVM to get the infrastructure in place. Also improve the kvm_exit tracepoint on arm/arm64 to print something meaningful and be much less misleading compared to what we have now. This series depends on the Rework architected timer and fix UEFI reset series sent earlier. It is also available here: https://git.linaro.org/people/christoffer.dall/linux-kvm-arm.git tracing-fixup I borrowed some of this code from Alex Bennée, thanks! Christoffer Dall (2): arm/arm64: KVM: Add tracepoints for vgic and timer arm/arm64: KVM: Improve kvm_exit tracepoint arch/arm/include/asm/kvm_arm.h | 20 + arch/arm/kvm/arm.c | 2 +- arch/arm/kvm/trace.h | 10 +++-- arch/arm64/include/asm/kvm_arm.h | 16 +++ virt/kvm/arm/arch_timer.c| 4 ++ virt/kvm/arm/trace.h | 97 virt/kvm/arm/vgic.c | 9 7 files changed, 154 insertions(+), 4 deletions(-) create mode 100644 virt/kvm/arm/trace.h -- 2.1.2.330.g565301e.dirty -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] arm/arm64: KVM: Improve kvm_exit tracepoint
The ARM architecture only saves the exit class to the HSR (ESR_EL2 for arm64) on synchronous exceptions, not on asynchronous exceptions like an IRQ. However, we only report the exception class on kvm_exit, which is confusing because an IRQ looks like it exited at some PC with the same reason as the previous exit. Add a lookup table for the exception index and prepend the kvm_exit tracepoint text with the exception type to clarify this situation. Also resolve the exception class (EC) to a human-friendly text version so the trace output becomes immediately usable for debugging this code. Signed-off-by: Christoffer Dall christoffer.d...@linaro.org --- arch/arm/include/asm/kvm_arm.h | 20 arch/arm/kvm/arm.c | 2 +- arch/arm/kvm/trace.h | 10 +++--- arch/arm64/include/asm/kvm_arm.h | 16 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index d995821..dc641dd 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -218,4 +218,24 @@ #define HSR_DABT_CM(1U 8) #define HSR_DABT_EA(1U 9) +#define kvm_arm_exception_type \ + {0, RESET }, \ + {1, UNDEFINED }, \ + {2, SOFTWARE }, \ + {3, PREF_ABORT }, \ + {4, DATA_ABORT }, \ + {5, IRQ },\ + {6, FIQ },\ + {7, HVC } + +#define HSRECN(x) { HSR_EC_##x, #x } + +#define kvm_arm_exception_class \ + HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \ + HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \ + HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \ + HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \ + HSRECN(DABT), HSRECN(DABT_HYP) + + #endif /* __ARM_KVM_ARM_H__ */ diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 102a4aa..ffec2f2 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -606,7 +606,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) * guest time. */ kvm_guest_exit(); - trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); + trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); /* * We must sync the timer state before the vgic state so that diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h index 0ec3539..c25a885 100644 --- a/arch/arm/kvm/trace.h +++ b/arch/arm/kvm/trace.h @@ -25,21 +25,25 @@ TRACE_EVENT(kvm_entry, ); TRACE_EVENT(kvm_exit, - TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc), - TP_ARGS(exit_reason, vcpu_pc), + TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc), + TP_ARGS(idx, exit_reason, vcpu_pc), TP_STRUCT__entry( + __field(int,idx ) __field(unsigned int, exit_reason ) __field(unsigned long, vcpu_pc ) ), TP_fast_assign( + __entry-idx= idx; __entry-exit_reason= exit_reason; __entry-vcpu_pc= vcpu_pc; ), - TP_printk(HSR_EC: 0x%04x, PC: 0x%08lx, + TP_printk(%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx, + __print_symbolic(__entry-idx, kvm_arm_exception_type), __entry-exit_reason, + __print_symbolic(__entry-exit_reason, kvm_arm_exception_class), __entry-vcpu_pc) ); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 7605e09..ffb86bf 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -197,4 +197,20 @@ /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) +#define kvm_arm_exception_type \ + {0, IRQ },\ + {1, TRAP } + +#define ECN(x) { ESR_ELx_EC_##x, #x } + +#define kvm_arm_exception_class \ + ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \ + ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \ + ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \ + ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \ + ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \ + ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \ + ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ + ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) + #endif /* __ARM64_KVM_ARM_H__ */ -- 2.1.2.330.g565301e.dirty -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH V3 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses
On 08/26/2015 01:10 PM, Jason Wang wrote: On 08/25/2015 07:51 PM, Michael S. Tsirkin wrote: On Tue, Aug 25, 2015 at 05:05:47PM +0800, Jason Wang wrote: We register wildcard mmio eventfd on two buses, one for KVM_MMIO_BUS and another is KVM_FAST_MMIO_BUS. This leads to issue: - kvm_io_bus_destroy() knows nothing about the devices on two buses points to a single dev. Which will lead double free [1] during exit. - wildcard eventfd ignores data len, so it was registered as a kvm_io_range with zero length. This will fail the binary search in kvm_io_bus_get_first_dev() when we try to emulate through KVM_MMIO_BUS. This will cause userspace io emulation request instead of a eventfd notification (virtqueue kick will be trapped by qemu instead of vhost in this case). Fixing this by don't register wildcard mmio eventfd on two buses. Instead, only register it in KVM_FAST_MMIO_BUS. This fixes the double free issue of kvm_io_bus_destroy(). For the arch/setups that does not utilize KVM_FAST_MMIO_BUS, before searching KVM_MMIO_BUS, try KVM_FAST_MMIO_BUS first to see it it has a match. [1] Panic caused by double free: CPU: 1 PID: 2894 Comm: qemu-system-x86 Not tainted 3.19.0-26-generic #28-Ubuntu Hardware name: LENOVO 2356BG6/2356BG6, BIOS G7ET96WW (2.56 ) 09/12/2013 task: 88009ae0c4b0 ti: 88020e7f task.ti: 88020e7f RIP: 0010:[c07e25d8] [c07e25d8] ioeventfd_release+0x28/0x60 [kvm] RSP: 0018:88020e7f3bc8 EFLAGS: 00010292 RAX: dead00200200 RBX: 8801ec19c900 RCX: 00018200016d RDX: 8801ec19cf80 RSI: ea0008bf1d40 RDI: 8801ec19c900 RBP: 88020e7f3bd8 R08: 2fc75a01 R09: 00018200016d R10: c07df6ae R11: 88022fc75a98 R12: 88021e7cc000 R13: 88021e7cca48 R14: 88021e7cca50 R15: 8801ec19c880 FS: 7fc1ee3e6700() GS:88023e24() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 7f8f389d8000 CR3: 00023dc13000 CR4: 001427e0 Stack: 88021e7cc000 88020e7f3be8 c07e2622 88020e7f3c38 c07df69a 880232524160 88020e792d80 880219b78c00 0008 8802321686a8 Call Trace: [c07e2622] ioeventfd_destructor+0x12/0x20 [kvm] [c07df69a] kvm_put_kvm+0xca/0x210 [kvm] [c07df818] kvm_vcpu_release+0x18/0x20 [kvm] [811f69f7] __fput+0xe7/0x250 [811f6bae] fput+0xe/0x10 [81093f04] task_work_run+0xd4/0xf0 [81079358] do_exit+0x368/0xa50 [81082c8f] ? recalc_sigpending+0x1f/0x60 [81079ad5] do_group_exit+0x45/0xb0 [81085c71] get_signal+0x291/0x750 [810144d8] do_signal+0x28/0xab0 [810f3a3b] ? do_futex+0xdb/0x5d0 [810b7028] ? __wake_up_locked_key+0x18/0x20 [810f3fa6] ? SyS_futex+0x76/0x170 [81014fc9] do_notify_resume+0x69/0xb0 [817cb9af] int_signal+0x12/0x17 Code: 5d c3 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 83 ec 08 48 8b 7f 20 e8 06 d6 a5 c0 48 8b 43 08 48 8b 13 48 89 df 48 89 42 08 48 89 10 48 b8 00 01 10 00 00 RIP [c07e25d8] ioeventfd_release+0x28/0x60 [kvm] RSP 88020e7f3bc8 Cc: Gleb Natapov g...@kernel.org Cc: Paolo Bonzini pbonz...@redhat.com Cc: Michael S. Tsirkin m...@redhat.com Signed-off-by: Jason Wang jasow...@redhat.com --- Changes from V2: - Tweak styles and comment suggested by Cornelia. Changes from v1: - change ioeventfd_bus_from_flags() to return KVM_FAST_MMIO_BUS when needed to save lots of unnecessary changes. --- virt/kvm/eventfd.c | 31 +-- virt/kvm/kvm_main.c | 16 ++-- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 9ff4193..c3ffdc3 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -762,13 +762,16 @@ ioeventfd_check_collision(struct kvm *kvm, struct _ioeventfd *p) return false; } -static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags) +static enum kvm_bus ioeventfd_bus_from_args(struct kvm_ioeventfd *args) { - if (flags KVM_IOEVENTFD_FLAG_PIO) + if (args-flags KVM_IOEVENTFD_FLAG_PIO) return KVM_PIO_BUS; - if (flags KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) + if (args-flags KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY) return KVM_VIRTIO_CCW_NOTIFY_BUS; - return KVM_MMIO_BUS; + /* When length is ignored, MMIO is put on a separate bus, for + * faster lookups. + */ + return args-len ? KVM_MMIO_BUS : KVM_FAST_MMIO_BUS; } static int @@ -779,7 +782,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct kvm_ioeventfd *args)
Fwd: Data buffer Transfer through Hypercall
Hi All, Does anyone know how to transfer data buffer through Hypercall? According to the current implementation from kvm_emulate_hypercall, it only takes a primitive type as parameters through different registers. Can we use hyprecall like read/write system call to transfer data between guest and hypervisor? Is virtio the best way to communicate between guest and host at the moment? If that's the case, which virtio device will be the best. Thanks, Yaohui -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC 1/3] vmx: allow ioeventfd for EPT violations
On 08/30/2015 05:12 PM, Michael S. Tsirkin wrote: Even when we skip data decoding, MMIO is slightly slower than port IO because it uses the page-tables, so the CPU must do a pagewalk on each access. This overhead is normally masked by using the TLB cache: but not so for KVM MMIO, where PTEs are marked as reserved and so are never cached. As ioeventfd memory is never read, make it possible to use RO pages on the host for ioeventfds, instead. I like this idea. The result is that TLBs are cached, which finally makes MMIO as fast as port IO. What does TLBs are cached mean? Even after applying the patch no new TLB type can be cached. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- arch/x86/kvm/vmx.c | 5 + 1 file changed, 5 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9d1bfd3..ed44026 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5745,6 +5745,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); + if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) { + skip_emulated_instruction(vcpu); + return 1; + } + I am afraid that the common page fault entry point is not a good place to do the work. Would move it to kvm_handle_bad_page()? The different is the workload of fast_page_fault() is included but it's light enough and MMIO-exit should not be very frequent, so i think it's okay. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html