[PATCH 2/2] kvm/x86: use __test_bit

2015-08-30 Thread Michael S. Tsirkin
Let compiler do slightly better optimizations using
the non-volatile __test_bit in all cases where
the values are set using the non-volatile __set_bit and
__clear_bit.

I left test_bit in place where the mask is set  using
the atomic set_bit/clear_bit, for symmetry.

This shaves about 100 bytes off the kernel size:

before:
134868 29978372  146237   23b3d arch/x86/kvm/kvm-intel.ko
34312947640 441  391210   5f82a arch/x86/kvm/kvm.ko
after:
134836  29978372  146205   23b1d arch/x86/kvm/kvm-intel.ko
343017 47640 441  391098   5f7ba arch/x86/kvm/kvm.ko

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 arch/x86/kvm/ioapic.h |  2 +-
 arch/x86/kvm/kvm_cache_regs.h |  6 +++---
 arch/x86/kvm/ioapic.c |  2 +-
 arch/x86/kvm/pmu_intel.c  |  2 +-
 arch/x86/kvm/vmx.c| 18 +-
 arch/x86/kvm/x86.c|  2 +-
 6 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index ca0b0b4..3b58d41 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -102,7 +102,7 @@ static inline bool kvm_ioapic_handles_vector(struct kvm 
*kvm, int vector)
 {
struct kvm_ioapic *ioapic = kvm-arch.vioapic;
smp_rmb();
-   return test_bit(vector, ioapic-handled_vectors);
+   return __test_bit(vector, ioapic-handled_vectors);
 }
 
 void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h
index e1e89ee..21ef6d6 100644
--- a/arch/x86/kvm/kvm_cache_regs.h
+++ b/arch/x86/kvm/kvm_cache_regs.h
@@ -9,7 +9,7 @@
 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu,
  enum kvm_reg reg)
 {
-   if (!test_bit(reg, (unsigned long *)vcpu-arch.regs_avail))
+   if (!__test_bit(reg, (unsigned long *)vcpu-arch.regs_avail))
kvm_x86_ops-cache_reg(vcpu, reg);
 
return vcpu-arch.regs[reg];
@@ -38,7 +38,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int 
index)
 {
might_sleep();  /* on svm */
 
-   if (!test_bit(VCPU_EXREG_PDPTR,
+   if (!__test_bit(VCPU_EXREG_PDPTR,
  (unsigned long *)vcpu-arch.regs_avail))
kvm_x86_ops-cache_reg(vcpu, VCPU_EXREG_PDPTR);
 
@@ -68,7 +68,7 @@ static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, 
ulong mask)
 
 static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu)
 {
-   if (!test_bit(VCPU_EXREG_CR3, (ulong *)vcpu-arch.regs_avail))
+   if (!__test_bit(VCPU_EXREG_CR3, (ulong *)vcpu-arch.regs_avail))
kvm_x86_ops-decache_cr3(vcpu);
return vcpu-arch.cr3;
 }
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
index 856f791..bf2afa5 100644
--- a/arch/x86/kvm/ioapic.c
+++ b/arch/x86/kvm/ioapic.c
@@ -117,7 +117,7 @@ static void __rtc_irq_eoi_tracking_restore_one(struct 
kvm_vcpu *vcpu)
return;
 
new_val = kvm_apic_pending_eoi(vcpu, e-fields.vector);
-   old_val = test_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map);
+   old_val = __test_bit(vcpu-vcpu_id, ioapic-rtc_status.dest_map);
 
if (new_val == old_val)
return;
diff --git a/arch/x86/kvm/pmu_intel.c b/arch/x86/kvm/pmu_intel.c
index ab38af4..fb20a0f 100644
--- a/arch/x86/kvm/pmu_intel.c
+++ b/arch/x86/kvm/pmu_intel.c
@@ -98,7 +98,7 @@ static bool intel_pmc_is_enabled(struct kvm_pmc *pmc)
 {
struct kvm_pmu *pmu = pmc_to_pmu(pmc);
 
-   return test_bit(pmc-idx, (unsigned long *)pmu-global_ctrl);
+   return __test_bit(pmc-idx, (unsigned long *)pmu-global_ctrl);
 }
 
 static struct kvm_pmc *intel_pmc_idx_to_pmc(struct kvm_pmu *pmu, int pmc_idx)
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index c117703..ed44026 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2025,7 +2025,7 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
 {
unsigned long rflags, save_rflags;
 
-   if (!test_bit(VCPU_EXREG_RFLAGS, (ulong *)vcpu-arch.regs_avail)) {
+   if (!__test_bit(VCPU_EXREG_RFLAGS, (ulong *)vcpu-arch.regs_avail)) {
__set_bit(VCPU_EXREG_RFLAGS, (ulong *)vcpu-arch.regs_avail);
rflags = vmcs_readl(GUEST_RFLAGS);
if (to_vmx(vcpu)-rmode.vm86_active) {
@@ -3478,7 +3478,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
 {
struct kvm_mmu *mmu = vcpu-arch.walk_mmu;
 
-   if (!test_bit(VCPU_EXREG_PDPTR,
+   if (!__test_bit(VCPU_EXREG_PDPTR,
  (unsigned long *)vcpu-arch.regs_dirty))
return;
 
@@ -3513,7 +3513,7 @@ static void ept_update_paging_mode_cr0(unsigned long 
*hw_cr0,
unsigned long cr0,
struct kvm_vcpu *vcpu)
 {
-   if (!test_bit(VCPU_EXREG_CR3, (ulong *)vcpu-arch.regs_avail))
+   if (!__test_bit(VCPU_EXREG_CR3, (ulong 

[PATCH RFC 0/3] pci-testdev add support for kvm ioeventfd pf

2015-08-30 Thread Michael S. Tsirkin
This adds a test for triggering ioeventfd on pagefaults.
This was used to verify that mmio ioeventfd on pagefault is
as fast as portio.

Michael S. Tsirkin (3):
  pci-testdev: separate page for each mmio test
  pci-testdev: add subregion
  pci-testdev: add RO pages for ioeventfd

 hw/misc/pci-testdev.c | 25 ++---
 1 file changed, 22 insertions(+), 3 deletions(-)

-- 
MST

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC 3/3] pci-testdev: add RO pages for ioeventfd

2015-08-30 Thread Michael S. Tsirkin
This seems hackish - would it be better to create this region
automatically within kvm? Suggestions are welcome.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/misc/pci-testdev.c | 13 +
 1 file changed, 13 insertions(+)

diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c
index 94141a3..55efc32 100644
--- a/hw/misc/pci-testdev.c
+++ b/hw/misc/pci-testdev.c
@@ -21,6 +21,7 @@
 #include hw/pci/pci.h
 #include qemu/event_notifier.h
 #include qemu/osdep.h
+#include sys/mman.h
 
 typedef struct PCITestDevHdr {
 uint8_t test;
@@ -82,11 +83,13 @@ typedef struct PCITestDevState {
 PCIDevice parent_obj;
 /* public */
 
+MemoryRegion zeromr;
 MemoryRegion mmio;
 MemoryRegion mbar;
 MemoryRegion portio;
 IOTest *tests;
 int current;
+void *zero;
 } PCITestDevState;
 
 #define TYPE_PCI_TEST_DEV pci-testdev
@@ -242,6 +245,11 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error 
**errp)
 uint8_t *pci_conf;
 char *name;
 int r, i;
+d-zero = mmap(NULL, IOTEST_MEMSIZE * 2, PROT_READ,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+
+memory_region_init_ram_ptr(d-zeromr, OBJECT(d), pci-testdev-zero, 
0x1000, d-zero);
+memory_region_set_readonly(d-zeromr, true);
 
 pci_conf = pci_dev-config;
 
@@ -286,6 +294,11 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error 
**errp)
 test-hasnotifier = false;
 continue;
 }
+
+if (test-hasnotifier  !test-size) {
+memory_region_add_subregion_overlap(d-mbar, 
le32_to_cpu(test-hdr-offset),
+d-zeromr, 2 /* prio */);
+}
 r = event_notifier_init(test-notifier, 0);
 assert(r = 0);
 test-hasnotifier = true;
-- 
MST

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC 2/3] pci-testdev: add subregion

2015-08-30 Thread Michael S. Tsirkin
Make mmio a subregion of the BAR. 
This will allow mapping rom within the same BAR down the road.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/misc/pci-testdev.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c
index 6edc1cd..94141a3 100644
--- a/hw/misc/pci-testdev.c
+++ b/hw/misc/pci-testdev.c
@@ -83,6 +83,7 @@ typedef struct PCITestDevState {
 /* public */
 
 MemoryRegion mmio;
+MemoryRegion mbar;
 MemoryRegion portio;
 IOTest *tests;
 int current;
@@ -248,9 +249,13 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error 
**errp)
 
 memory_region_init_io(d-mmio, OBJECT(d), pci_testdev_mmio_ops, d,
   pci-testdev-mmio, IOTEST_MEMSIZE * 2);
+memory_region_init(d-mbar, OBJECT(d),
+  pci-testdev-mmio, IOTEST_MEMSIZE * 2);
 memory_region_init_io(d-portio, OBJECT(d), pci_testdev_pio_ops, d,
   pci-testdev-portio, IOTEST_IOSIZE * 2);
-pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, d-mmio);
+
+memory_region_add_subregion_overlap(d-mbar, 0, d-mmio, 1 /* prio */);
+pci_register_bar(pci_dev, 0, PCI_BASE_ADDRESS_SPACE_MEMORY, d-mbar);
 pci_register_bar(pci_dev, 1, PCI_BASE_ADDRESS_SPACE_IO, d-portio);
 
 d-current = -1;
-- 
MST

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC 1/3] pci-testdev: separate page for each mmio test

2015-08-30 Thread Michael S. Tsirkin
note: this makes BAR  4K, which requires kvm unit test
patch to support such BAR. Do we need to worry about
old kvm unit test binaries? I'm guessing not ...

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 hw/misc/pci-testdev.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c
index b6e11d6..6edc1cd 100644
--- a/hw/misc/pci-testdev.c
+++ b/hw/misc/pci-testdev.c
@@ -48,7 +48,7 @@ typedef struct IOTest {
 #define IOTEST_NODATA  0xAB
 
 #define IOTEST_IOSIZE 128
-#define IOTEST_MEMSIZE 2048
+#define IOTEST_MEMSIZE 0x1
 
 static const char *iotest_test[] = {
 no-eventfd,
@@ -262,7 +262,7 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error 
**errp)
 test-hdr = g_malloc0(test-bufsize);
 memcpy(test-hdr-name, name, strlen(name) + 1);
 g_free(name);
-test-hdr-offset = cpu_to_le32(IOTEST_SIZE(i) + i * 
IOTEST_ACCESS_WIDTH);
+test-hdr-offset = cpu_to_le32(IOTEST_SIZE(i) + i * (IOTEST_IS_MEM(i) 
? 0x1000 : IOTEST_ACCESS_WIDTH));
 test-size = strcmp(IOTEST_TEST(i), nodata-eventfd) ? 
IOTEST_ACCESS_WIDTH : 0;
 
 test-match_data = strcmp(IOTEST_TEST(i), wildcard-eventfd) 
@@ -273,6 +273,7 @@ static void pci_testdev_realize(PCIDevice *pci_dev, Error 
**errp)
 test-mr = IOTEST_REGION(d, i);
 
 if (!test-size  !IOTEST_IS_MEM(i)) {
+test-hdr-width = 0;
 test-hasnotifier = false;
 continue;
 }
-- 
MST

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC 2/3] svm: allow ioeventfd for NPT page faults

2015-08-30 Thread Michael S. Tsirkin
MMIO is slightly slower than port IO because it uses the page-tables, so
the CPU must do a pagewalk on each access.

This overhead is normally masked by using the TLB cache:
but not so for KVM MMIO, where PTEs are marked as reserved
and so are never cached.

As ioeventfd memory is never read, make it possible to use
RO pages on the host for ioeventfds, instead.
The result is that TLBs are cached, which finally makes MMIO
as fast as port IO.

Warning: untested.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 arch/x86/kvm/svm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 8e0c084..6422fac 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1812,6 +1812,11 @@ static int pf_interception(struct vcpu_svm *svm)
switch (svm-apf_reason) {
default:
error_code = svm-vmcb-control.exit_info_1;
+   if (!kvm_io_bus_write(svm-vcpu, KVM_FAST_MMIO_BUS,
+ fault_address, 0, NULL)) {
+   skip_emulated_instruction(svm-vcpu);
+   return 1;
+   }
 
trace_kvm_page_fault(fault_address, error_code);
if (!npt_enabled  kvm_event_needs_reinjection(svm-vcpu))
-- 
MST

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC 3/3] kvm: add KVM_CAP_IOEVENTFD_PF capability

2015-08-30 Thread Michael S. Tsirkin
Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 include/uapi/linux/kvm.h  | 1 +
 arch/x86/kvm/x86.c| 1 +
 Documentation/virtual/kvm/api.txt | 7 +++
 3 files changed, 9 insertions(+)

diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 716ad4a..4509aa3 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -817,6 +817,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_DISABLE_QUIRKS 116
 #define KVM_CAP_X86_SMM 117
 #define KVM_CAP_MULTI_ADDRESS_SPACE 118
+#define KVM_CAP_IOEVENTFD_PF 119
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8015fa..f989453 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2629,6 +2629,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
ext)
case KVM_CAP_IRQ_INJECT_STATUS:
case KVM_CAP_IOEVENTFD:
case KVM_CAP_IOEVENTFD_NO_LENGTH:
+   case KVM_CAP_IOEVENTFD_PF:
case KVM_CAP_PIT2:
case KVM_CAP_PIT_STATE2:
case KVM_CAP_SET_IDENTITY_MAP_ADDR:
diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index a7926a9..85a76ad 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -1618,6 +1618,13 @@ The following flags are defined:
 If datamatch flag is set, the event will be signaled only if the written value
 to the registered address is equal to datamatch in struct kvm_ioeventfd.
 
+If KVM_CAP_IOEVENTFD_NO_LENGTH is present, and when DATAMATCH flag
+is clear, len can be set to 0 to match access of any length.
+
+If KVM_CAP_IOEVENTFD_PF is present, and when DATAMATCH flag
+is clear and len is set to 0, the specified address can overlap
+a read-only memory region (as opposed to an MMIO region).
+
 For virtio-ccw devices, addr contains the subchannel id and datamatch the
 virtqueue index.
 
-- 
MST

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC 1/3] vmx: allow ioeventfd for EPT violations

2015-08-30 Thread Michael S. Tsirkin
Even when we skip data decoding, MMIO is slightly slower
than port IO because it uses the page-tables, so the CPU
must do a pagewalk on each access.

This overhead is normally masked by using the TLB cache:
but not so for KVM MMIO, where PTEs are marked as reserved
and so are never cached.

As ioeventfd memory is never read, make it possible to use
RO pages on the host for ioeventfds, instead.
The result is that TLBs are cached, which finally makes MMIO
as fast as port IO.

Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
 arch/x86/kvm/vmx.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9d1bfd3..ed44026 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5745,6 +5745,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 
GUEST_INTR_STATE_NMI);
 
gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+   if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+   skip_emulated_instruction(vcpu);
+   return 1;
+   }
+
trace_kvm_page_fault(gpa, exit_qualification);
 
/* It is a write fault? */
-- 
MST

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC 3/3] pci-testdev: add RO pages for ioeventfd

2015-08-30 Thread Gonglei
On 2015/8/30 17:20, Michael S. Tsirkin wrote:
 This seems hackish - would it be better to create this region
 automatically within kvm? Suggestions are welcome.
 
 Signed-off-by: Michael S. Tsirkin m...@redhat.com
 ---
  hw/misc/pci-testdev.c | 13 +
  1 file changed, 13 insertions(+)
 
 diff --git a/hw/misc/pci-testdev.c b/hw/misc/pci-testdev.c
 index 94141a3..55efc32 100644
 --- a/hw/misc/pci-testdev.c
 +++ b/hw/misc/pci-testdev.c
 @@ -21,6 +21,7 @@
  #include hw/pci/pci.h
  #include qemu/event_notifier.h
  #include qemu/osdep.h
 +#include sys/mman.h
  
  typedef struct PCITestDevHdr {
  uint8_t test;
 @@ -82,11 +83,13 @@ typedef struct PCITestDevState {
  PCIDevice parent_obj;
  /* public */
  
 +MemoryRegion zeromr;
  MemoryRegion mmio;
  MemoryRegion mbar;
  MemoryRegion portio;
  IOTest *tests;
  int current;
 +void *zero;
  } PCITestDevState;
  
  #define TYPE_PCI_TEST_DEV pci-testdev
 @@ -242,6 +245,11 @@ static void pci_testdev_realize(PCIDevice *pci_dev, 
 Error **errp)
  uint8_t *pci_conf;
  char *name;
  int r, i;
 +d-zero = mmap(NULL, IOTEST_MEMSIZE * 2, PROT_READ,
 + MAP_SHARED | MAP_ANONYMOUS, -1, 0);
 +

Do we need think about hotplugging pci-testdev ? If yes, then we should release 
some resources
when hot-unplug a pci-testdev device:
munmap(d-zero, ...)
memory_region_del_subregion(d-mbar, d-mmio)
...

Regards,
-Gonglei

 +memory_region_init_ram_ptr(d-zeromr, OBJECT(d), pci-testdev-zero, 
 0x1000, d-zero);
 +memory_region_set_readonly(d-zeromr, true);
  
  pci_conf = pci_dev-config;
  
 @@ -286,6 +294,11 @@ static void pci_testdev_realize(PCIDevice *pci_dev, 
 Error **errp)
  test-hasnotifier = false;
  continue;
  }
 +
 +if (test-hasnotifier  !test-size) {
 +memory_region_add_subregion_overlap(d-mbar, 
 le32_to_cpu(test-hdr-offset),
 +d-zeromr, 2 /* prio */);
 +}
  r = event_notifier_init(test-notifier, 0);
  assert(r = 0);
  test-hasnotifier = true;
 


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH RFC 0/3] kvm add ioeventfd pf capability

2015-08-30 Thread Michael S. Tsirkin
One of the reasons MMIO is slower than port IO is
because it requires a page table lookup.
For normal memory accesses, this is solved by using the TLB
cache - but MMIO entries are either not present or reserved
and so are never cached.

To fix, allow installing an ioeventfd on top of a read only
memory region, which allows the CPU to cache the translations.

Warning: svm patch is untested.

Michael S. Tsirkin (3):
  vmx: allow ioeventfd for EPT violations
  svm: allow ioeventfd for NPT page faults
  kvm: add KVM_CAP_IOEVENTFD_PF capability

 include/uapi/linux/kvm.h  | 1 +
 arch/x86/kvm/svm.c| 5 +
 arch/x86/kvm/vmx.c| 5 +
 arch/x86/kvm/x86.c| 1 +
 Documentation/virtual/kvm/api.txt | 7 +++
 5 files changed, 19 insertions(+)

-- 
MST

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] KVM: arm64: Decode basic HYP fault information

2015-08-30 Thread Christoffer Dall
On Tue, Aug 11, 2015 at 10:34:07AM +0300, Pavel Fedin wrote:
 Print exception vector name, exception class and PC translated to EL1 virtual
 address. Significantly aids debugging HYP crashes without special means like
 JTAG.

my overall concern with this patch is that it adds complexity to an
already really bad situation, and potentially increases the likelihood
of not seeing any debug info at all.

do you encounter this kind of panic a lot?  I haven't experienced a
great need for more hyp debugging help lately...

 
 Signed-off-by: Pavel Fedin p.fe...@samsung.com
 ---
  arch/arm64/kvm/handle_exit.c | 30 +
  arch/arm64/kvm/hyp.S | 46 
 +---
  2 files changed, 48 insertions(+), 28 deletions(-)
 
 diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
 index 29b184a..4d70d64 100644
 --- a/arch/arm64/kvm/handle_exit.c
 +++ b/arch/arm64/kvm/handle_exit.c
 @@ -136,3 +136,33 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run 
 *run,
   return 0;
   }
  }
 +
 +static const char *const hyp_faults[] = {
 + EL2t Synchronous,
 + EL2t IRQ,
 + EL2t FIQ,
 + EL2t Error,
 + EL2h Synchronous,
 + EL2h IRQ,
 + EL2h FIQ,
 + EL2h Error,
 + EL1 Synchronous,
 + EL1 IRQ,
 + EL1 FIQ,
 + EL1 Error
 +};
 +
 +void kvm_hyp_panic(unsigned long vector, unsigned int spsr, unsigned long pc,
 +unsigned int esr, unsigned long far, unsigned long hpfar,
 +unsigned long par, struct kvm_vcpu *vcpu)
 +{
 + pr_emerg(Unhandled HYP exception %s on VCPU %p\n,
 + hyp_faults[vector], vcpu);
 + pr_emerg(PC : %016lx SPSR : %08x ESR: %08x\n, pc, spsr, esr);
 + pr_emerg(FAR: %016lx HPFAR: %016lx PAR: %016lx\n, far, hpfar, par);
 +
 + pr_emerg(Exception class: %02x Translated PC: %016lx\n,
 + esr  ESR_ELx_EC_SHIFT, pc - HYP_PAGE_OFFSET + PAGE_OFFSET);
 +
 + panic(HYP panic);
 +}
 diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
 index c81eaaf..62785cd 100644
 --- a/arch/arm64/kvm/hyp.S
 +++ b/arch/arm64/kvm/hyp.S
 @@ -1060,13 +1060,11 @@ __kvm_hyp_panic:
   ldr x2, [x0, #VCPU_HOST_CONTEXT]
   kern_hyp_va x2
  
 + mov x0, lr
   bl __restore_sysregs
 + mov lr, x0
  
 -1:   adr x0, __hyp_panic_str
 - adr x1, 2f
 - ldp x2, x3, [x1]
 - sub x0, x0, x2
 - add x0, x0, x3
 +1:   mov x0, lr
   mrs x1, spsr_el2
   mrs x2, elr_el2
   mrs x3, esr_el2
 @@ -1078,20 +1076,11 @@ __kvm_hyp_panic:
   mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\
 PSR_MODE_EL1h)
   msr spsr_el2, lr
 - ldr lr, =panic
 + ldr lr, =kvm_hyp_panic
   msr elr_el2, lr
   eret
 -
 - .align  3
 -2:   .quad   HYP_PAGE_OFFSET
 - .quad   PAGE_OFFSET

why can you get rid of this?

  ENDPROC(__kvm_hyp_panic)
  
 -__hyp_panic_str:
 - .ascii  HYP panic:\nPS:%08x PC:%p ESR:%p\nFAR:%p HPFAR:%p 
 PAR:%p\nVCPU:%p\n\0
 -
 - .align  2
 -
  /*
   * u64 kvm_call_hyp(void *hypfn, ...);
   *
 @@ -1115,26 +1104,27 @@ ENTRY(kvm_call_hyp)
   ret
  ENDPROC(kvm_call_hyp)
  
 -.macro invalid_vectorlabel, target
 +.macro invalid_vectorlabel, N, target
   .align  2
  \label:
 + mov lr, #\N
   b \target
  ENDPROC(\label)
  .endm
  
   /* None of these should ever happen */
 - invalid_vector  el2t_sync_invalid, __kvm_hyp_panic
 - invalid_vector  el2t_irq_invalid, __kvm_hyp_panic
 - invalid_vector  el2t_fiq_invalid, __kvm_hyp_panic
 - invalid_vector  el2t_error_invalid, __kvm_hyp_panic
 - invalid_vector  el2h_sync_invalid, __kvm_hyp_panic
 - invalid_vector  el2h_irq_invalid, __kvm_hyp_panic
 - invalid_vector  el2h_fiq_invalid, __kvm_hyp_panic
 - invalid_vector  el2h_error_invalid, __kvm_hyp_panic
 - invalid_vector  el1_sync_invalid, __kvm_hyp_panic
 - invalid_vector  el1_irq_invalid, __kvm_hyp_panic
 - invalid_vector  el1_fiq_invalid, __kvm_hyp_panic
 - invalid_vector  el1_error_invalid, __kvm_hyp_panic
 + invalid_vector  el2t_sync_invalid, 0, __kvm_hyp_panic
 + invalid_vector  el2t_irq_invalid, 1, __kvm_hyp_panic
 + invalid_vector  el2t_fiq_invalid, 2, __kvm_hyp_panic
 + invalid_vector  el2t_error_invalid, 3, __kvm_hyp_panic
 + invalid_vector  el2h_sync_invalid, 4, __kvm_hyp_panic
 + invalid_vector  el2h_irq_invalid, 5, __kvm_hyp_panic
 + invalid_vector  el2h_fiq_invalid, 6, __kvm_hyp_panic
 + invalid_vector  el2h_error_invalid, 7, __kvm_hyp_panic
 + invalid_vector  el1_sync_invalid, 8, __kvm_hyp_panic
 + invalid_vector  el1_irq_invalid, 9, __kvm_hyp_panic
 + invalid_vector  el1_fiq_invalid, 10, __kvm_hyp_panic
 + invalid_vector  el1_error_invalid, 11, __kvm_hyp_panic
  
  el1_sync:// Guest trapped into EL2
   pushx0, 

Re: [PATCH 3/3] KVM: arm64: Implement accessors for vGIC CPU interface registers

2015-08-30 Thread Peter Maydell
On 30 August 2015 at 17:50, Christoffer Dall
christoffer.d...@linaro.org wrote:
 I had imagined we would encode the GICv3 register accesses through the
 device API and not through the system register API, since I'm not crazy
 about polluting the general system register handling logic with GIC
 registers solely for the purposes of migration.

There's an interesting design question lurking under this
about the extent to which you expose the h/w design split
between the CPU interface and the GIC proper as part of the
KVM APIs. I'm inclined to agree that it's better to for our
purposes treat both bits as just part of an irqchip device,
but I haven't given it a great deal of thought.

(Similarly in the QEMU emulated-GICv3 case you could also
split the CPU i/f more formally, or not. The kernel's choice
would have implications for which way QEMU ends up going,
I think.)

thanks
-- PMM
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/9] Rework architected timer and fix UEFI reset

2015-08-30 Thread Christoffer Dall
The architected timer integration with the vgic had some shortcomings in
that certain guests (one being UEFI) weren't fully supported.

In fixing this I also found that we are scheduling the hrtimer for the
virtual timer way too often, with a potential performance overhead.

This series tries to address these problems in proviging level-triggered
semantics for the arch timer and vgic intergration and seeks to clarify
the behavior when setting/clearing the active state on the physical
distributor.

Series based on kvmarm/next and also available at:
https://git.linaro.org/people/christoffer.dall/linux-kvm-arm.git timer-rework

Christoffer Dall (9):
  KVM: Add kvm_arch_vcpu_{un}blocking callbacks
  arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block
  arm/arm64: KVM: vgic: Factor out level irq processing on guest exit
  arm/arm64: Implement GICD_ICFGR as RO for PPIs
  arm/arm64: KVM: Use appropriate define in VGIC reset code
  arm/arm64: KVM: Add mapped interrupts documentation
  arm/arm64: KVM: vgic: Move active state handling to flush_hwstate
  arm/arm64: KVM: Rework the arch timer to use level-triggered semantics
  arm/arm64: KVM: arch timer: Reset CNTV_CTL to 0

 Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt |  59 ++
 arch/arm/kvm/arm.c |  21 ++-
 arch/mips/include/asm/kvm_host.h   |   2 +
 arch/powerpc/include/asm/kvm_host.h|   2 +
 arch/s390/include/asm/kvm_host.h   |   2 +
 arch/x86/include/asm/kvm_host.h|   3 +
 include/kvm/arm_arch_timer.h   |   4 +-
 include/kvm/arm_vgic.h |   3 -
 include/linux/kvm_host.h   |   2 +
 virt/kvm/arm/arch_timer.c  | 160 +++-
 virt/kvm/arm/vgic.c| 201 +++--
 virt/kvm/kvm_main.c|   3 +
 12 files changed, 308 insertions(+), 154 deletions(-)
 create mode 100644 Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt

-- 
2.1.2.330.g565301e.dirty

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] KVM: arm64: Implement accessors for vGIC CPU interface registers

2015-08-30 Thread Christoffer Dall
On Fri, Aug 28, 2015 at 03:56:12PM +0300, Pavel Fedin wrote:
 This commit adds accessors for all registers, being part of saved vGIC
 context in the form of ICH_VMCR_EL2. This is necessary for enabling vGICv3
 live migration.
 
 Signed-off-by: Pavel Fedin p.fe...@samsung.com
 ---
  arch/arm64/kvm/sys_regs.c  | 176 
 +
  include/linux/irqchip/arm-gic-v3.h |  18 +++-
  2 files changed, 192 insertions(+), 2 deletions(-)
 
 diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
 index 8cc4a5e..7a4f982 100644
 --- a/arch/arm64/kvm/sys_regs.c
 +++ b/arch/arm64/kvm/sys_regs.c
 @@ -23,6 +23,7 @@
  #include linux/kvm_host.h
  #include linux/mm.h
  #include linux/uaccess.h
 +#include linux/irqchip/arm-gic-v3.h
  
  #include asm/cacheflush.h
  #include asm/cputype.h
 @@ -136,6 +137,162 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu,
   return true;
  }
  
 +static bool access_gic_ctlr(struct kvm_vcpu *vcpu,
 + const struct sys_reg_params *p,
 + const struct sys_reg_desc *r)
 +{
 + u64 val;
 + struct vgic_v3_cpu_if *vgicv3 = vcpu-arch.vgic_cpu.vgic_v3;
 +
 + if (vcpu-kvm-arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
 + return false;
 +
 + if (p-is_write) {
 + val = *vcpu_reg(vcpu, p-Rt);
 +
 + vgicv3-vgic_vmcr = ~(ICH_VMCR_CBPR|ICH_VMCR_EOIM);
 + vgicv3-vgic_vmcr |= (val  (ICH_VMCR_CBPR_SHIFT -
 + ICC_CTLR_EL1_CBPR_SHIFT)) 
 + ICH_VMCR_CBPR;
 + vgicv3-vgic_vmcr |= (val  (ICH_VMCR_EOIM_SHIFT -
 + ICC_CTLR_EL1_EOImode_SHIFT)) 
 + ICH_VMCR_EOIM;
 + } else {
 + asm volatile(mrs_s %0, __stringify(ICC_IAR1_EL1)
 +  : =r (val));
 + val = (ICC_CTLR_EL1_A3V | ICC_CTLR_EL1_SEIS |
 + ICC_CTLR_EL1_IDbits_MASK | ICC_CTLR_EL1_PRIbits_MASK);
 + val |= (vgicv3-vgic_vmcr  ICH_VMCR_CBPR) 
 + (ICH_VMCR_CBPR_SHIFT - ICC_CTLR_EL1_CBPR_SHIFT);
 + val |= (vgicv3-vgic_vmcr  ICH_VMCR_EOIM) 
 + (ICH_VMCR_EOIM_SHIFT - ICC_CTLR_EL1_EOImode_SHIFT);
 +
 + *vcpu_reg(vcpu, p-Rt) = val;
 + }
 +
 + return true;
 +}
 +
 +static bool access_gic_pmr(struct kvm_vcpu *vcpu,
 +const struct sys_reg_params *p,
 +const struct sys_reg_desc *r)
 +{
 + u64 val;
 + struct vgic_v3_cpu_if *vgicv3 = vcpu-arch.vgic_cpu.vgic_v3;
 +
 + if (vcpu-kvm-arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
 + return false;
 +
 + if (p-is_write) {
 + val = *vcpu_reg(vcpu, p-Rt);
 + vgicv3-vgic_vmcr = ~ICH_VMCR_PMR_MASK;
 + vgicv3-vgic_vmcr |= (val  ICH_VMCR_PMR_SHIFT) 
 + ICH_VMCR_PMR_MASK;
 + } else {
 + val = (vgicv3-vgic_vmcr  ICH_VMCR_PMR_MASK) 
 + ICH_VMCR_PMR_SHIFT;
 + *vcpu_reg(vcpu, p-Rt) = val;
 + }
 +
 + return true;
 +}
 +
 +static bool access_gic_bpr0(struct kvm_vcpu *vcpu,
 + const struct sys_reg_params *p,
 + const struct sys_reg_desc *r)
 +{
 + u64 val;
 + struct vgic_v3_cpu_if *vgicv3 = vcpu-arch.vgic_cpu.vgic_v3;
 +
 + if (vcpu-kvm-arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
 + return false;
 +
 + if (p-is_write) {
 + val = *vcpu_reg(vcpu, p-Rt);
 + vgicv3-vgic_vmcr = ~ICH_VMCR_BPR0_MASK;
 + vgicv3-vgic_vmcr |= (val  ICH_VMCR_BPR0_SHIFT) 
 + ICH_VMCR_BPR0_MASK;
 + } else {
 + val = (vgicv3-vgic_vmcr  ICH_VMCR_BPR0_MASK) 
 + ICH_VMCR_BPR0_SHIFT;
 + *vcpu_reg(vcpu, p-Rt) = val;
 + }
 +
 + return true;
 +}
 +
 +static bool access_gic_bpr1(struct kvm_vcpu *vcpu,
 + const struct sys_reg_params *p,
 + const struct sys_reg_desc *r)
 +{
 + u64 val;
 + struct vgic_v3_cpu_if *vgicv3 = vcpu-arch.vgic_cpu.vgic_v3;
 +
 + if (vcpu-kvm-arch.vgic.vgic_model != KVM_DEV_TYPE_ARM_VGIC_V3)
 + return false;
 +
 + if (p-is_write) {
 + val = *vcpu_reg(vcpu, p-Rt);
 + vgicv3-vgic_vmcr = ~ICH_VMCR_BPR1_MASK;
 + vgicv3-vgic_vmcr |= (val  ICH_VMCR_BPR1_SHIFT) 
 + ICH_VMCR_BPR1_MASK;
 + } else {
 + val = (vgicv3-vgic_vmcr  ICH_VMCR_BPR1_MASK) 
 + ICH_VMCR_BPR1_SHIFT;
 + *vcpu_reg(vcpu, p-Rt) = val;
 + }
 +
 + return true;
 +}
 +
 +static bool access_gic_grpen0(struct kvm_vcpu *vcpu,
 +   const struct sys_reg_params *p,
 +   const struct 

Re: [PATCH 1/3] KVM: arm64: Implement vGICv3 distributor and redistributor access from userspace

2015-08-30 Thread Christoffer Dall
On Fri, Aug 28, 2015 at 03:56:10PM +0300, Pavel Fedin wrote:
 The access is done similar to GICv2, using KVM_DEV_ARM_VGIC_GRP_DIST_REGS
 and KVM_DEV_ARM_VGIC_GRP_REDIST_REGS with KVM_SET_DEVICE_ATTR and
 KVM_GET_DEVICE_ATTR ioctls.
 
 Registers are always assumed to be of their native size, 4 or 8 bytes.
 
 Signed-off-by: Pavel Fedin p.fe...@samsung.com
 ---
  arch/arm64/include/uapi/asm/kvm.h |   1 +
  virt/kvm/arm/vgic-v3-emul.c   | 186 
 +++---
  2 files changed, 172 insertions(+), 15 deletions(-)
 
 diff --git a/arch/arm64/include/uapi/asm/kvm.h 
 b/arch/arm64/include/uapi/asm/kvm.h
 index 0cd7b59..2936651 100644
 --- a/arch/arm64/include/uapi/asm/kvm.h
 +++ b/arch/arm64/include/uapi/asm/kvm.h
 @@ -203,6 +203,7 @@ struct kvm_arch_memory_slot {
  #define KVM_DEV_ARM_VGIC_GRP_NR_IRQS 3
  #define KVM_DEV_ARM_VGIC_GRP_CTRL4
  #define   KVM_DEV_ARM_VGIC_CTRL_INIT 0
 +#define KVM_DEV_ARM_VGIC_GRP_REDIST_REGS 5
  
  /* KVM_IRQ_LINE irq field index values */
  #define KVM_ARM_IRQ_TYPE_SHIFT   24
 diff --git a/virt/kvm/arm/vgic-v3-emul.c b/virt/kvm/arm/vgic-v3-emul.c
 index e661e7f..b3847e1 100644
 --- a/virt/kvm/arm/vgic-v3-emul.c
 +++ b/virt/kvm/arm/vgic-v3-emul.c
 @@ -39,6 +39,7 @@
  #include linux/kvm.h
  #include linux/kvm_host.h
  #include linux/interrupt.h
 +#include linux/uaccess.h
  
  #include linux/irqchip/arm-gic-v3.h
  #include kvm/arm_vgic.h
 @@ -990,6 +991,107 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 
 reg)
   vgic_kick_vcpus(vcpu-kvm);
  }
  
 +static int vgic_v3_attr_regs_access(struct kvm_device *dev,
 +  struct kvm_device_attr *attr,
 +  void *reg, u32 len, bool is_write)

using a void pointer for the register with variable length here is
likely to cause endianness headaches.  Can we use a typed pointer here?

 +{
 + const struct vgic_io_range *r = NULL, *ranges;
 + phys_addr_t offset;
 + int ret, cpuid, c;
 + struct kvm_vcpu *vcpu, *tmp_vcpu;
 + struct vgic_dist *vgic;
 + struct kvm_exit_mmio mmio;
 + u64 data;
 +
 + offset = attr-attr  KVM_DEV_ARM_VGIC_OFFSET_MASK;
 + cpuid = (attr-attr  KVM_DEV_ARM_VGIC_CPUID_MASK) 
 + KVM_DEV_ARM_VGIC_CPUID_SHIFT;
 +
 + mutex_lock(dev-kvm-lock);
 +
 + ret = vgic_init(dev-kvm);
 + if (ret)
 + goto out;
 +
 + if (cpuid = atomic_read(dev-kvm-online_vcpus)) {
 + ret = -EINVAL;
 + goto out;
 + }
 +
 + vcpu = kvm_get_vcpu(dev-kvm, cpuid);
 + vgic = dev-kvm-arch.vgic;
 +
 + mmio.len = len;
 + mmio.is_write = is_write;
 + mmio.data = data;
 + if (is_write) {
 + if (len == 8)
 + data = cpu_to_le64(*((u64 *)reg));
 + else
 + mmio_data_write(mmio, ~0, *((u32 *)reg));
 + }
 + switch (attr-group) {
 + case KVM_DEV_ARM_VGIC_GRP_DIST_REGS:
 + mmio.phys_addr = vgic-vgic_dist_base + offset;
 + ranges = vgic_v3_dist_ranges;
 + break;
 + case KVM_DEV_ARM_VGIC_GRP_REDIST_REGS:
 + mmio.phys_addr = vgic-vgic_redist_base + offset;
 + ranges = vgic_redist_ranges;
 + break;
 + default:
 + BUG();
 + }
 + r = vgic_find_range(ranges, 4, offset);
 +
 + if (unlikely(!r || !r-handle_mmio)) {
 + ret = -ENXIO;
 + goto out;
 + }
 +
 +
 + spin_lock(vgic-lock);
 +
 + /*
 +  * Ensure that no other VCPU is running by checking the vcpu-cpu
 +  * field.  If no other VPCUs are running we can safely access the VGIC
 +  * state, because even if another VPU is run after this point, that
 +  * VCPU will not touch the vgic state, because it will block on
 +  * getting the vgic-lock in kvm_vgic_sync_hwstate().
 +  */
 + kvm_for_each_vcpu(c, tmp_vcpu, dev-kvm) {
 + if (unlikely(tmp_vcpu-cpu != -1)) {
 + ret = -EBUSY;
 + goto out_vgic_unlock;
 + }
 + }
 +
 + /*
 +  * Move all pending IRQs from the LRs on all VCPUs so the pending
 +  * state can be properly represented in the register state accessible
 +  * through this API.
 +  */
 + kvm_for_each_vcpu(c, tmp_vcpu, dev-kvm)
 + vgic_unqueue_irqs(tmp_vcpu);
 +
 + offset -= r-base;
 + r-handle_mmio(vcpu, mmio, offset);
 +
 + if (!is_write) {
 + if (len == 8)
 + *(u64 *)reg = le64_to_cpu(data);
 + else
 + *(u32 *)reg = mmio_data_read(mmio, ~0);
 + }
 +
 + ret = 0;
 +out_vgic_unlock:
 + spin_unlock(vgic-lock);
 +out:
 + mutex_unlock(dev-kvm-lock);
 + return ret;

I feel like there's a lot of reused code with the v2 vgic here.  Can you
look at reusing some of the logic?

 +}
 +
  static int vgic_v3_create(struct kvm_device *dev, u32 type)
  {
   return 

Re: [PATCH 0/3] KVM: arm64: Implement API for vGICv3 live migration

2015-08-30 Thread Christoffer Dall
On Fri, Aug 28, 2015 at 03:56:09PM +0300, Pavel Fedin wrote:
 This patchset adds necessary userspace API in order to support vGICv3 live
 migration. This includes accessing GIC distributor and redistributor memory
 regions using device attribute ioctls, and system registers of
 CPU interface using register get/set ioctls.

This obviously lacks a clear description of the API in
Documentation/virtual/kvm/devices/arm-vgic.txt

 
 Pavel Fedin (3):
   KVM: arm64: Implement vGICv3 distributor and redistributor access from
 userspace
   KVM: arm64: Allow to use accessors in KVM_SET_ONE_REG and
 KVM_GET_ONE_REG
   KVM: arm64: Implement accessors for vGIC CPU interface registers
 
  arch/arm64/include/uapi/asm/kvm.h  |   1 +
  arch/arm64/kvm/sys_regs.c  | 223 
 -
  include/linux/irqchip/arm-gic-v3.h |  18 ++-
  virt/kvm/arm/vgic-v3-emul.c| 186 ---
  4 files changed, 405 insertions(+), 23 deletions(-)
 
 -- 
 2.4.4
 
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/9] arm/arm64: KVM: arch_timer: Only schedule soft timer on vcpu_block

2015-08-30 Thread Christoffer Dall
We currently schedule a soft timer every time we exit the guest if the
timer did not expire while running the guest.  This is really not
necessary, because the only work we do in the timer work function is to
kick the vcpu.

Kicking the vcpu does two things:
(1) If the vpcu thread is on a waitqueue, make it runnable and remove it
from the waitqueue.
(2) If the vcpu is running on a different physical CPU from the one
doing the kick, it sends a reschedule IPI.

The second case cannot happen, because the soft timer is only ever
scheduled when the vcpu is not running.  The first case is only relevant
when the vcpu thread is on a waitqueue, which is only the case when the
vcpu thread has called kvm_vcpu_block().

Therefore, we only need to make sure a timer is scheduled for
kvm_vcpu_block(), which we do by encapsulating all calls to
kvm_vcpu_block() with kvm_timer_{un}schedule calls.

Additionally, we only schedule a soft timer if the timer is enabled and
unmasked, since it is useless otherwise.

Note that theoretically userspace can use the SET_ONE_REG interface to
change registers that should cause the timer to fire, even if the vcpu
is blocked without a scheduled timer, but this case was not supported
before this patch and we leave it for future work for now.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 arch/arm/include/asm/kvm_host.h   |  3 --
 arch/arm/kvm/arm.c| 10 +
 arch/arm64/include/asm/kvm_host.h |  3 --
 include/kvm/arm_arch_timer.h  |  2 +
 virt/kvm/arm/arch_timer.c | 89 +--
 5 files changed, 70 insertions(+), 37 deletions(-)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 86fcf6e..dcba0fa 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -236,7 +236,4 @@ static inline void kvm_arm_setup_debug(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
 
-static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
-
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index ce404a5..bdf8871 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -271,6 +271,16 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
return kvm_timer_should_fire(vcpu);
 }
 
+void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
+{
+   kvm_timer_schedule(vcpu);
+}
+
+void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
+{
+   kvm_timer_unschedule(vcpu);
+}
+
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
/* Force users to call KVM_ARM_VCPU_INIT */
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index dd143f5..415938d 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -257,7 +257,4 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
 
-static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
-static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
-
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index e1e4d7c..ef14cc1 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -71,5 +71,7 @@ u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid);
 int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value);
 
 bool kvm_timer_should_fire(struct kvm_vcpu *vcpu);
+void kvm_timer_schedule(struct kvm_vcpu *vcpu);
+void kvm_timer_unschedule(struct kvm_vcpu *vcpu);
 
 #endif
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 76e38d2..018f3d6 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -111,14 +111,21 @@ static enum hrtimer_restart kvm_timer_expire(struct 
hrtimer *hrt)
return HRTIMER_NORESTART;
 }
 
+static bool kvm_timer_irq_enabled(struct kvm_vcpu *vcpu)
+{
+   struct arch_timer_cpu *timer = vcpu-arch.timer_cpu;
+
+   return !(timer-cntv_ctl  ARCH_TIMER_CTRL_IT_MASK) 
+   (timer-cntv_ctl  ARCH_TIMER_CTRL_ENABLE) 
+   !kvm_vgic_get_phys_irq_active(timer-map);
+}
+
 bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
 {
struct arch_timer_cpu *timer = vcpu-arch.timer_cpu;
cycle_t cval, now;
 
-   if ((timer-cntv_ctl  ARCH_TIMER_CTRL_IT_MASK) ||
-   !(timer-cntv_ctl  ARCH_TIMER_CTRL_ENABLE) ||
-   kvm_vgic_get_phys_irq_active(timer-map))
+   if (!kvm_timer_irq_enabled(vcpu))
return false;
 
cval = timer-cntv_cval;
@@ -127,24 +134,59 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu)
return cval = now;
 }
 
-/**
- * kvm_timer_flush_hwstate - prepare to move the virt timer to the 

[PATCH 8/9] arm/arm64: KVM: Rework the arch timer to use level-triggered semantics

2015-08-30 Thread Christoffer Dall
The arch timer currently uses edge-triggered semantics in the sense that
the line is never sampled by the vgic and lowering the line from the
timer to the vgic doesn't have any affect on the pending state of
virtual interrupts in the vgic.  This means that we do not support a
guest with the otherwise valid behavior of (1) disable interrupts (2)
enable the timer (3) disable the timer (4) enable interrupts.  Such a
guest would validly not expect to see any interrupts on real hardware,
but will see interrupts on KVM.

This patches fixes this shortcoming through the following series of
changes.

First, we change the flow of the timer/vgic sync/flush operations.  Now
the timer is always flushed/synced before the vgic, because the vgic
samples the state of the timer output.  This has the implication that we
move the timer operations in to non-preempible sections, but that is
fine after the previous commit getting rid of hrtimer schedules on every
entry/exit.

Second, we change the internal behavior of the timer, letting the timer
keep track of its previous output state, and only lower/raise the line
to the vgic when the state changes.  Note that in theory this could have
been accomplished more simply by signalling the vgic every time the
state *potentially* changed, but we don't want to be hitting the vgic
more often than necessary.

Third, we get rid of the use of the map-active field in the vgic and
instead simply set the interrupt as active on the physical distributor
whenever we signal a mapped interrupt to the guest, and we reset the
active state when we sync back the HW state from the vgic.

Fourth, and finally, we now initialize the timer PPIs (and all the other
unused PPIs for now), to be level-triggered, and modify the sync code to
sample the line state on HW sync and re-inject a new interrupt if it is
still pending at that time.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 arch/arm/kvm/arm.c   | 11 +--
 include/kvm/arm_arch_timer.h |  2 +-
 include/kvm/arm_vgic.h   |  3 --
 virt/kvm/arm/arch_timer.c| 68 +++-
 virt/kvm/arm/vgic.c  | 67 +++
 5 files changed, 81 insertions(+), 70 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index bdf8871..102a4aa 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
if (ret = 0 || need_new_vmid_gen(vcpu-kvm)) {
local_irq_enable();
+   kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
preempt_enable();
-   kvm_timer_sync_hwstate(vcpu);
continue;
}
 
@@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
kvm_guest_exit();
trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
 
+   /*
+* We must sync the timer state before the vgic state so that
+* the vgic can properly sample the updated state of the
+* interrupt line.
+*/
+   kvm_timer_sync_hwstate(vcpu);
+
kvm_vgic_sync_hwstate(vcpu);
 
preempt_enable();
 
-   kvm_timer_sync_hwstate(vcpu);
-
ret = handle_exit(vcpu, run, ret);
}
 
diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h
index ef14cc1..1800227 100644
--- a/include/kvm/arm_arch_timer.h
+++ b/include/kvm/arm_arch_timer.h
@@ -51,7 +51,7 @@ struct arch_timer_cpu {
boolarmed;
 
/* Timer IRQ */
-   const struct kvm_irq_level  *irq;
+   struct kvm_irq_levelirq;
 
/* VGIC mapping */
struct irq_phys_map *map;
diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h
index d901f1a..99011a0 100644
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@@ -163,7 +163,6 @@ struct irq_phys_map {
u32 virt_irq;
u32 phys_irq;
u32 irq;
-   boolactive;
 };
 
 struct irq_phys_map_entry {
@@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu);
 struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu,
   int virt_irq, int irq);
 int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map);
-bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map);
-void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active);
 
 #define irqchip_in_kernel(k)   (!!((k)-arch.vgic.in_kernel))
 #define vgic_initialized(k)(!!((k)-arch.vgic.nr_cpus))
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 

[PATCH 5/9] arm/arm64: KVM: Use appropriate define in VGIC reset code

2015-08-30 Thread Christoffer Dall
We currently initialize the SGIs to be enabled in the VGIC code, but we
use the VGIC_NR_PPIS define for this purpose, instead of the the more
natural VGIC_NR_SGIS.  Change this slightly confusing use of the
defines.

Note: This should have no functional change, as both names are defined
to the number 16.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 virt/kvm/arm/vgic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 0ba92d3..8299c24 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -2099,7 +2099,7 @@ int vgic_init(struct kvm *kvm)
}
 
for (i = 0; i  dist-nr_irqs; i++) {
-   if (i  VGIC_NR_PPIS)
+   if (i  VGIC_NR_SGIS)
vgic_bitmap_set_irq_val(dist-irq_enabled,
vcpu-vcpu_id, i, 1);
if (i  VGIC_NR_PRIVATE_IRQS)
-- 
2.1.2.330.g565301e.dirty

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 9/9] arm/arm64: KVM: arch timer: Reset CNTV_CTL to 0

2015-08-30 Thread Christoffer Dall
Provide a better quality of implementation and be architecture compliant
on ARMv7 for the architected timer by resetting the CNTV_CTL to 0 on
reset of the timer, and call kvm_timer_update_state(vcpu) at the same
time, ensuring the timer output is not asserted after, for example, a
PSCI system reset.

This change alone fixes the UEFI reset issue reported by Laszlo back in
February.

Cc: Laszlo Ersek ler...@redhat.com
Cc: Ard Biesheuvel ard.biesheu...@linaro.org
Cc: Drew Jones drjo...@redhat.com
Cc: Wei Huang w...@redhat.com
Cc: Peter Maydell peter.mayd...@linaro.org
Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 virt/kvm/arm/arch_timer.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 747302f..8a0fdfc 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -255,6 +255,15 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu,
timer-irq.irq = irq-irq;
 
/*
+* The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8
+* and to 0 for ARMv7.  We provide an implementation that always
+* resets the timer to be disabled and unmasked and is compliant with
+* the ARMv7 architecture.
+*/
+   timer-cntv_ctl = 0;
+   kvm_timer_update_state(vcpu);
+
+   /*
 * Tell the VGIC that the virtual interrupt is tied to a
 * physical interrupt. We do that once per VCPU.
 */
-- 
2.1.2.330.g565301e.dirty

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 7/9] arm/arm64: KVM: vgic: Move active state handling to flush_hwstate

2015-08-30 Thread Christoffer Dall
We currently set the physical active state only when we *inject* a new
pending virtual interrupt, but this is actually not correct, because we
could have been preempted and run something else on the system that
resets the active state to clear.  This causes us to run the VM with the
timer set to fire, but without setting the physical active state.

The solution is to always check the LR configurations, and we if have a
mapped interrupt in th LR in either the pending or active state
(virtual), then set the physical active state.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 virt/kvm/arm/vgic.c | 42 ++
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 8299c24..9ed8d53 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1144,26 +1144,11 @@ static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, 
int irq,
struct irq_phys_map *map;
map = vgic_irq_map_search(vcpu, irq);
 
-   /*
-* If we have a mapping, and the virtual interrupt is
-* being injected, then we must set the state to
-* active in the physical world. Otherwise the
-* physical interrupt will fire and the guest will
-* exit before processing the virtual interrupt.
-*/
if (map) {
-   int ret;
-
-   BUG_ON(!map-active);
vlr.hwirq = map-phys_irq;
vlr.state |= LR_HW;
vlr.state = ~LR_EOI_INT;
 
-   ret = irq_set_irqchip_state(map-irq,
-   IRQCHIP_STATE_ACTIVE,
-   true);
-   WARN_ON(ret);
-
/*
 * Make sure we're not going to sample this
 * again, as a HW-backed interrupt cannot be
@@ -1255,7 +1240,7 @@ static void __kvm_vgic_flush_hwstate(struct kvm_vcpu 
*vcpu)
struct vgic_cpu *vgic_cpu = vcpu-arch.vgic_cpu;
struct vgic_dist *dist = vcpu-kvm-arch.vgic;
unsigned long *pa_percpu, *pa_shared;
-   int i, vcpu_id;
+   int i, vcpu_id, lr, ret;
int overflow = 0;
int nr_shared = vgic_nr_shared_irqs(dist);
 
@@ -1310,6 +1295,31 @@ epilog:
 */
clear_bit(vcpu_id, dist-irq_pending_on_cpu);
}
+
+   for (lr = 0; lr  vgic-nr_lr; lr++) {
+   struct vgic_lr vlr;
+
+   if (!test_bit(lr, vgic_cpu-lr_used))
+   continue;
+
+   vlr = vgic_get_lr(vcpu, lr);
+
+   /*
+* If we have a mapping, and the virtual interrupt is
+* presented to the guest (as pending or active), then we must
+* set the state to active in the physical world. See
+* Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt.
+*/
+   if (vlr.state  LR_HW) {
+   struct irq_phys_map *map;
+   map = vgic_irq_map_search(vcpu, vlr.irq);
+
+   ret = irq_set_irqchip_state(map-irq,
+   IRQCHIP_STATE_ACTIVE,
+   true);
+   WARN_ON(ret);
+   }
+   }
 }
 
 static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
-- 
2.1.2.330.g565301e.dirty

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/9] arm/arm64: Implement GICD_ICFGR as RO for PPIs

2015-08-30 Thread Christoffer Dall
The GICD_ICFGR allows the bits for the SGIs and PPIs to be read only.
We currently simulate this behavior by writing a hardcoded value to the
register for the SGIs and PPIs on every write of these bits to the
register (ignoring what the guest actually wrote), and by writing the
same value as the reset value to the register.

This is a bit counter-intuitive, as the register is RO for these bits,
and we can just implement it that way, allowing us to control the value
of the bits purely in the reset code.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 virt/kvm/arm/vgic.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index c5750be..0ba92d3 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -655,7 +655,7 @@ bool vgic_handle_cfg_reg(u32 *reg, struct kvm_exit_mmio 
*mmio,
ACCESS_READ_VALUE | ACCESS_WRITE_VALUE);
if (mmio-is_write) {
if (offset  8) {
-   *reg = ~0U; /* Force PPIs/SGIs to 1 */
+   /* Ignore writes to read-only SGI and PPI bits */
return false;
}
 
-- 
2.1.2.330.g565301e.dirty

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/9] KVM: Add kvm_arch_vcpu_{un}blocking callbacks

2015-08-30 Thread Christoffer Dall
Some times it is useful for architecture implementations of KVM to know
when the VCPU thread is about to block or when it comes back from
blocking (arm/arm64 needs to know this to properly implement timers, for
example).

Therefore provide a generic architecture callback function in line with
what we do elsewhere for KVM generic-arch interactions.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 arch/arm/include/asm/kvm_host.h | 3 +++
 arch/arm64/include/asm/kvm_host.h   | 3 +++
 arch/mips/include/asm/kvm_host.h| 2 ++
 arch/powerpc/include/asm/kvm_host.h | 2 ++
 arch/s390/include/asm/kvm_host.h| 2 ++
 arch/x86/include/asm/kvm_host.h | 3 +++
 include/linux/kvm_host.h| 2 ++
 virt/kvm/kvm_main.c | 3 +++
 8 files changed, 20 insertions(+)

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index dcba0fa..86fcf6e 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -236,4 +236,7 @@ static inline void kvm_arm_setup_debug(struct kvm_vcpu 
*vcpu) {}
 static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {}
 
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
 #endif /* __ARM_KVM_HOST_H__ */
diff --git a/arch/arm64/include/asm/kvm_host.h 
b/arch/arm64/include/asm/kvm_host.h
index 415938d..dd143f5 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -257,4 +257,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
 void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu);
 
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index e8c8d9d..58f0f4d 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -845,5 +845,7 @@ static inline void kvm_arch_flush_shadow_memslot(struct kvm 
*kvm,
struct kvm_memory_slot *slot) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
 #endif /* __MIPS_KVM_HOST_H__ */
diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index d91f65b..179f9a7 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -702,5 +702,7 @@ static inline void kvm_arch_memslots_updated(struct kvm 
*kvm, struct kvm_memslot
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
 static inline void kvm_arch_exit(void) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
 #endif /* __POWERPC_KVM_HOST_H__ */
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 3024acb..04a97df 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -640,5 +640,7 @@ static inline void kvm_arch_memslots_updated(struct kvm 
*kvm, struct kvm_memslot
 static inline void kvm_arch_flush_shadow_all(struct kvm *kvm) {}
 static inline void kvm_arch_flush_shadow_memslot(struct kvm *kvm,
struct kvm_memory_slot *slot) {}
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
 
 #endif
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 2a7f5d7..26c4086 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1202,4 +1202,7 @@ int __x86_set_memory_region(struct kvm *kvm,
 int x86_set_memory_region(struct kvm *kvm,
  const struct kvm_userspace_memory_region *mem);
 
+static inline void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) {}
+static inline void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) {}
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 9564fd7..87d7be6 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -619,6 +619,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, 
const void *data,
 void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
 
 void kvm_vcpu_block(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu);
+void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 int kvm_vcpu_yield_to(struct kvm_vcpu *target);
 void 

[PATCH 6/9] arm/arm64: KVM: Add mapped interrupts documentation

2015-08-30 Thread Christoffer Dall
Mapped interrupts on arm/arm64 is a tricky concept and the way we deal
with them is not apparently easy to understand by reading various specs.

Therefore, add a proper documentation file explaining the flow and
rationale of the behavior of the vgic.

Some of this text was contributed by Marc Zyngier.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt | 59 ++
 1 file changed, 59 insertions(+)
 create mode 100644 Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt

diff --git a/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt 
b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
new file mode 100644
index 000..49e1357
--- /dev/null
+++ b/Documentation/virtual/kvm/arm/vgic-mapped-irqs.txt
@@ -0,0 +1,59 @@
+KVM/ARM VGIC Mapped Interrupts
+==
+
+Setting the Physical Active State for Edge vs. Level Triggered IRQs
+---
+
+Mapped non-shared interrupts injected to a guest should always mark the
+interrupt as active on the physical distributor.
+
+The reasoning for level-triggered interrupts:
+For level-triggered interrupts, we have to mark the interrupt as active
+on the physical distributor, because otherwise, as the line remains
+asserted, the guest will never execute because the host will keep taking
+interrupts.  As soon as the guest deactivates the interrupt, the
+physical line is sampled by the hardware again and the host takes a new
+interrupt if the physical line is still asserted.
+
+The reasoning for edge-triggered interrupts:
+For edge-triggered interrupts, if we set the HW bit in the LR we also
+have to mark the interrupt as active on the physical distributor.  If we
+don't set the physical active bit and the interrupt hits again before
+the guest has deactivated the interrupt, the interrupt goes to the host,
+which cannot set the state to ACTIVE+PENDING in the LR, because that is
+not supported when setting the HW bit in the LR.
+
+An alternative could be to not use HW bit at all, and inject
+edge-triggered interrupts from a physical assigned device as pure
+virtual interrupts, but that would potentially slow down handling of the
+interrupt in the guest, because a physical interrupt occurring in the
+middle of the guest ISR would preempt the guest for the host to handle
+the interrupt.
+
+
+Life Cycle for Forwarded Physical Interrupts
+
+
+By forwarded physical interrupts we mean interrupts presented to a guest
+representing a real HW event originally signaled to the host as a
+physical interrupt and injecting this as a virtual interrupt with the HW
+bit set in the LR.
+
+The state of such an interrupt is managed in the following way:
+
+  - LR.Pending must be set when the interrupt is first injected, because this
+is the only way the GICV interface is going to present it to the guest.
+  - LR.Pending will stay set as long as the guest has not acked the interrupt.
+  - LR.Pending transitions to LR.Active on read of IAR, as expected.
+  - On EOI, the *physical distributor* active bit gets cleared, but the
+LR.Active is left untouched - it looks like the GIC can only clear a
+single bit (either the virtual active, or the physical one).
+  - This means we cannot trust LR.Active to find out about the state of the
+interrupt, and we definitely need to look at the distributor version.
+
+Consequently, when we context switch the state of a VCPU with forwarded
+physical interrupts, we must context switch set pending *or* active bits in the
+LR for that VCPU until the guest has deactivated the physical interrupt, and
+then clear the corresponding bits in the LR.  If we ever set an LR to pending 
or
+mapped when switching in a VCPU for a forwarded physical interrupt, we must 
also
+set the active state on the *physical distributor*.
-- 
2.1.2.330.g565301e.dirty

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/9] arm/arm64: KVM: vgic: Factor out level irq processing on guest exit

2015-08-30 Thread Christoffer Dall
Currently vgic_process_maintenance() processes dealing with a completed
level-triggered interrupt directly, but we are soon going to reuse this
logic for level-triggered mapped interrupts with the HW bit set, so
move this logic into a separate static function.

Probably the most scary part of this commit is convincing yourself that
the current flow is safe compared to the old one.  In the following I
try to list the changes and why they are harmless:

  Move vgic_irq_clear_queued after kvm_notify_acked_irq:
Harmless because the effect of clearing the queued flag wrt.
kvm_set_irq is only that vgic_update_irq_pending does not set the
pending bit on the emulated CPU interface or in the pending_on_cpu
bitmask, but we set this in __kvm_vgic_sync_hwstate later on if the
level is stil high.

  Move vgic_set_lr before kvm_notify_acked_irq:
Also, harmless because the LR are cpu-local operations and
kvm_notify_acked only affects the dist

  Move vgic_dist_irq_clear_soft_pend after kvm_notify_acked_irq:
Also harmless because it's just a bit which is cleared and altering
the line state does not affect this bit.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 virt/kvm/arm/vgic.c | 88 ++---
 1 file changed, 50 insertions(+), 38 deletions(-)

diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 9eb489a..c5750be 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1312,12 +1312,56 @@ epilog:
}
 }
 
+static int process_level_irq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr)
+{
+   int level_pending = 0;
+
+   vlr.state = 0;
+   vlr.hwirq = 0;
+   vgic_set_lr(vcpu, lr, vlr);
+
+   /*
+* If the IRQ was EOIed (called from vgic_process_maintenance) or it
+* went from active to non-active (called from vgic_sync_hwirq) it was
+* also ACKed and we we therefore assume we can clear the soft pending
+* state (should it had been set) for this interrupt.
+*
+* Note: if the IRQ soft pending state was set after the IRQ was
+* acked, it actually shouldn't be cleared, but we have no way of
+* knowing that unless we start trapping ACKs when the soft-pending
+* state is set.
+*/
+   vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
+
+   /*
+* Tell the gic to start sampling the line of this interrupt again.
+*/
+   vgic_irq_clear_queued(vcpu, vlr.irq);
+
+   /* Any additional pending interrupt? */
+   if (vgic_dist_irq_get_level(vcpu, vlr.irq)) {
+   vgic_cpu_irq_set(vcpu, vlr.irq);
+   level_pending = 1;
+   } else {
+   vgic_dist_irq_clear_pending(vcpu, vlr.irq);
+   vgic_cpu_irq_clear(vcpu, vlr.irq);
+   }
+
+   /*
+* Despite being EOIed, the LR may not have
+* been marked as empty.
+*/
+   vgic_sync_lr_elrsr(vcpu, lr, vlr);
+
+   return level_pending;
+}
+
 static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
 {
u32 status = vgic_get_interrupt_status(vcpu);
struct vgic_dist *dist = vcpu-kvm-arch.vgic;
-   bool level_pending = false;
struct kvm *kvm = vcpu-kvm;
+   int level_pending = 0;
 
kvm_debug(STATUS = %08x\n, status);
 
@@ -1332,54 +1376,22 @@ static bool vgic_process_maintenance(struct kvm_vcpu 
*vcpu)
 
for_each_set_bit(lr, eisr_ptr, vgic-nr_lr) {
struct vgic_lr vlr = vgic_get_lr(vcpu, lr);
-   WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
 
-   spin_lock(dist-lock);
-   vgic_irq_clear_queued(vcpu, vlr.irq);
+   WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq));
WARN_ON(vlr.state  LR_STATE_MASK);
-   vlr.state = 0;
-   vgic_set_lr(vcpu, lr, vlr);
 
-   /*
-* If the IRQ was EOIed it was also ACKed and we we
-* therefore assume we can clear the soft pending
-* state (should it had been set) for this interrupt.
-*
-* Note: if the IRQ soft pending state was set after
-* the IRQ was acked, it actually shouldn't be
-* cleared, but we have no way of knowing that unless
-* we start trapping ACKs when the soft-pending state
-* is set.
-*/
-   vgic_dist_irq_clear_soft_pend(vcpu, vlr.irq);
 
/*
 * kvm_notify_acked_irq calls kvm_set_irq()
-* to reset the IRQ level. Need to release the
-* lock for kvm_set_irq to grab it.
+* to reset the IRQ level, which grabs the dist-lock
+   

[PATCH 1/2] arm/arm64: KVM: Add tracepoints for vgic and timer

2015-08-30 Thread Christoffer Dall
The VGIC and timer code for KVM arm/arm64 doesn't have any tracepoints
or tracepoint infrastructure defined.  Rewriting some of the timer code
handling showed me how much we need this, so let's add these simple
trace points once and for all and we can easily expand with additional
trace points in these files as we go along.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 virt/kvm/arm/arch_timer.c |  4 ++
 virt/kvm/arm/trace.h  | 97 +++
 virt/kvm/arm/vgic.c   |  9 +
 3 files changed, 110 insertions(+)
 create mode 100644 virt/kvm/arm/trace.h

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 8a0fdfc..f63b208 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -28,6 +28,8 @@
 #include kvm/arm_vgic.h
 #include kvm/arm_arch_timer.h
 
+#include trace.h
+
 static struct timecounter *timecounter;
 static struct workqueue_struct *wqueue;
 static unsigned int host_vtimer_irq;
@@ -128,6 +130,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu)
 
BUG_ON(!vgic_initialized(vcpu-kvm));
 
+   trace_kvm_timer_update_irq(vcpu-vcpu_id, timer-map-virt_irq,
+  timer-irq-level);
ret = kvm_vgic_inject_mapped_irq(vcpu-kvm, vcpu-vcpu_id,
 timer-map,
 timer-irq.level);
diff --git a/virt/kvm/arm/trace.h b/virt/kvm/arm/trace.h
new file mode 100644
index 000..48c3c90
--- /dev/null
+++ b/virt/kvm/arm/trace.h
@@ -0,0 +1,97 @@
+#if !defined(_TRACE_KVM_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_KVM_H
+
+#include linux/tracepoint.h
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kvm
+
+/*
+ * Tracepoints for vgic
+ */
+TRACE_EVENT(kvm_vgic_set_irqchip_active,
+   TP_PROTO(unsigned long vcpu_id, __u32 irq),
+   TP_ARGS(vcpu_id, irq),
+
+   TP_STRUCT__entry(
+   __field(unsigned long,  vcpu_id )
+   __field(__u32,  irq )
+   ),
+
+   TP_fast_assign(
+   __entry-vcpu_id= vcpu_id;
+   __entry-irq= irq;
+   ),
+
+   TP_printk(VCPU: %ld, IRQ %d, __entry-vcpu_id, __entry-irq)
+);
+
+TRACE_EVENT(kvm_vgic_clear_irqchip_active,
+   TP_PROTO(unsigned long vcpu_id, __u32 irq),
+   TP_ARGS(vcpu_id, irq),
+
+   TP_STRUCT__entry(
+   __field(unsigned long,  vcpu_id )
+   __field(__u32,  irq )
+   ),
+
+   TP_fast_assign(
+   __entry-vcpu_id= vcpu_id;
+   __entry-irq= irq;
+   ),
+
+   TP_printk(VCPU: %ld, IRQ %d, __entry-vcpu_id, __entry-irq)
+);
+
+TRACE_EVENT(vgic_update_irq_pending,
+   TP_PROTO(unsigned long vcpu_id, __u32 irq, bool level),
+   TP_ARGS(vcpu_id, irq, level),
+
+   TP_STRUCT__entry(
+   __field(unsigned long,  vcpu_id )
+   __field(__u32,  irq )
+   __field(bool,   level   )
+   ),
+
+   TP_fast_assign(
+   __entry-vcpu_id= vcpu_id;
+   __entry-irq= irq;
+   __entry-level  = level;
+   ),
+
+   TP_printk(VCPU: %ld, IRQ %d, level: %d,
+ __entry-vcpu_id, __entry-irq, __entry-level)
+);
+
+/*
+ * Tracepoints for arch_timer
+ */
+TRACE_EVENT(kvm_timer_inject_irq,
+   TP_PROTO(unsigned long vcpu_id, __u32 irq, int level),
+   TP_ARGS(vcpu_id, irq, level),
+
+   TP_STRUCT__entry(
+   __field(unsigned long,  vcpu_id )
+   __field(__u32,  irq )
+   __field(int,level   )
+   ),
+
+   TP_fast_assign(
+   __entry-vcpu_id= vcpu_id;
+   __entry-irq= irq;
+   __entry-level  = level;
+   ),
+
+   TP_printk(VCPU: %ld, IRQ %d, level %d,
+ __entry-vcpu_id, __entry-irq, __entry-level)
+);
+
+#endif /* _TRACE_KVM_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH ../../../virt/kvm/arm
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE trace
+
+/* This part must be outside protection */
+#include trace/define_trace.h
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index f4ea950..45c95a0 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -34,6 +34,9 @@
 #include asm/kvm.h
 #include kvm/iodev.h
 
+#define CREATE_TRACE_POINTS
+#include trace.h
+
 /*
  * How the whole thing works (courtesy of Christoffer Dall):
  *
@@ -1314,6 +1317,8 @@ epilog:
struct irq_phys_map *map;
map = vgic_irq_map_search(vcpu, vlr.irq);
 
+   trace_kvm_vgic_set_irqchip_active(vcpu-vcpu_id,
+ vlr.irq);
ret = 

[PATCH 0/2] Improve and add tracepoints for KVM on arm/arm64

2015-08-30 Thread Christoffer Dall
The timer and vgic code didn't have tracepoints for quite a while and
we've been adding those ad-hoc when doing development a lot of times.
Add some simple tracepoints for those parts of KVM to get the
infrastructure in place.

Also improve the kvm_exit tracepoint on arm/arm64 to print something
meaningful and be much less misleading compared to what we have now.

This series depends on the Rework architected timer and fix UEFI reset
series sent earlier.  It is also available here:

https://git.linaro.org/people/christoffer.dall/linux-kvm-arm.git tracing-fixup

I borrowed some of this code from Alex Bennée, thanks!

Christoffer Dall (2):
  arm/arm64: KVM: Add tracepoints for vgic and timer
  arm/arm64: KVM: Improve kvm_exit tracepoint

 arch/arm/include/asm/kvm_arm.h   | 20 +
 arch/arm/kvm/arm.c   |  2 +-
 arch/arm/kvm/trace.h | 10 +++--
 arch/arm64/include/asm/kvm_arm.h | 16 +++
 virt/kvm/arm/arch_timer.c|  4 ++
 virt/kvm/arm/trace.h | 97 
 virt/kvm/arm/vgic.c  |  9 
 7 files changed, 154 insertions(+), 4 deletions(-)
 create mode 100644 virt/kvm/arm/trace.h

-- 
2.1.2.330.g565301e.dirty

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] arm/arm64: KVM: Improve kvm_exit tracepoint

2015-08-30 Thread Christoffer Dall
The ARM architecture only saves the exit class to the HSR (ESR_EL2 for
arm64) on synchronous exceptions, not on asynchronous exceptions like an
IRQ.  However, we only report the exception class on kvm_exit, which is
confusing because an IRQ looks like it exited at some PC with the same
reason as the previous exit.  Add a lookup table for the exception index
and prepend the kvm_exit tracepoint text with the exception type to
clarify this situation.

Also resolve the exception class (EC) to a human-friendly text version
so the trace output becomes immediately usable for debugging this code.

Signed-off-by: Christoffer Dall christoffer.d...@linaro.org
---
 arch/arm/include/asm/kvm_arm.h   | 20 
 arch/arm/kvm/arm.c   |  2 +-
 arch/arm/kvm/trace.h | 10 +++---
 arch/arm64/include/asm/kvm_arm.h | 16 
 4 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h
index d995821..dc641dd 100644
--- a/arch/arm/include/asm/kvm_arm.h
+++ b/arch/arm/include/asm/kvm_arm.h
@@ -218,4 +218,24 @@
 #define HSR_DABT_CM(1U  8)
 #define HSR_DABT_EA(1U  9)
 
+#define kvm_arm_exception_type \
+   {0, RESET },  \
+   {1, UNDEFINED },  \
+   {2, SOFTWARE },   \
+   {3, PREF_ABORT }, \
+   {4, DATA_ABORT }, \
+   {5, IRQ },\
+   {6, FIQ },\
+   {7, HVC }
+
+#define HSRECN(x) { HSR_EC_##x, #x }
+
+#define kvm_arm_exception_class \
+   HSRECN(UNKNOWN), HSRECN(WFI), HSRECN(CP15_32), HSRECN(CP15_64), \
+   HSRECN(CP14_MR), HSRECN(CP14_LS), HSRECN(CP_0_13), HSRECN(CP10_ID), \
+   HSRECN(JAZELLE), HSRECN(BXJ), HSRECN(CP14_64), HSRECN(SVC_HYP), \
+   HSRECN(HVC), HSRECN(SMC), HSRECN(IABT), HSRECN(IABT_HYP), \
+   HSRECN(DABT), HSRECN(DABT_HYP)
+
+
 #endif /* __ARM_KVM_ARM_H__ */
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 102a4aa..ffec2f2 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -606,7 +606,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 * guest time.
 */
kvm_guest_exit();
-   trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+   trace_kvm_exit(ret, kvm_vcpu_trap_get_class(vcpu), 
*vcpu_pc(vcpu));
 
/*
 * We must sync the timer state before the vgic state so that
diff --git a/arch/arm/kvm/trace.h b/arch/arm/kvm/trace.h
index 0ec3539..c25a885 100644
--- a/arch/arm/kvm/trace.h
+++ b/arch/arm/kvm/trace.h
@@ -25,21 +25,25 @@ TRACE_EVENT(kvm_entry,
 );
 
 TRACE_EVENT(kvm_exit,
-   TP_PROTO(unsigned int exit_reason, unsigned long vcpu_pc),
-   TP_ARGS(exit_reason, vcpu_pc),
+   TP_PROTO(int idx, unsigned int exit_reason, unsigned long vcpu_pc),
+   TP_ARGS(idx, exit_reason, vcpu_pc),
 
TP_STRUCT__entry(
+   __field(int,idx )
__field(unsigned int,   exit_reason )
__field(unsigned long,  vcpu_pc )
),
 
TP_fast_assign(
+   __entry-idx= idx;
__entry-exit_reason= exit_reason;
__entry-vcpu_pc= vcpu_pc;
),
 
-   TP_printk(HSR_EC: 0x%04x, PC: 0x%08lx,
+   TP_printk(%s: HSR_EC: 0x%04x (%s), PC: 0x%08lx,
+ __print_symbolic(__entry-idx, kvm_arm_exception_type),
  __entry-exit_reason,
+ __print_symbolic(__entry-exit_reason, 
kvm_arm_exception_class),
  __entry-vcpu_pc)
 );
 
diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 7605e09..ffb86bf 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -197,4 +197,20 @@
 /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */
 #define HPFAR_MASK (~UL(0xf))
 
+#define kvm_arm_exception_type \
+   {0, IRQ },\
+   {1, TRAP }
+
+#define ECN(x) { ESR_ELx_EC_##x, #x }
+
+#define kvm_arm_exception_class \
+   ECN(UNKNOWN), ECN(WFx), ECN(CP15_32), ECN(CP15_64), ECN(CP14_MR), \
+   ECN(CP14_LS), ECN(FP_ASIMD), ECN(CP10_ID), ECN(CP14_64), ECN(SVC64), \
+   ECN(HVC64), ECN(SMC64), ECN(SYS64), ECN(IMP_DEF), ECN(IABT_LOW), \
+   ECN(IABT_CUR), ECN(PC_ALIGN), ECN(DABT_LOW), ECN(DABT_CUR), \
+   ECN(SP_ALIGN), ECN(FP_EXC32), ECN(FP_EXC64), ECN(SERROR), \
+   ECN(BREAKPT_LOW), ECN(BREAKPT_CUR), ECN(SOFTSTP_LOW), \
+   ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \
+   ECN(BKPT32), ECN(VECTOR32), ECN(BRK64)
+
 #endif /* __ARM64_KVM_ARM_H__ */
-- 
2.1.2.330.g565301e.dirty

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH V3 2/3] kvm: don't register wildcard MMIO EVENTFD on two buses

2015-08-30 Thread Jason Wang


On 08/26/2015 01:10 PM, Jason Wang wrote:
 On 08/25/2015 07:51 PM, Michael S. Tsirkin wrote:
  On Tue, Aug 25, 2015 at 05:05:47PM +0800, Jason Wang wrote:
   We register wildcard mmio eventfd on two buses, one for KVM_MMIO_BUS
   and another is KVM_FAST_MMIO_BUS. This leads to issue:
   
   - kvm_io_bus_destroy() knows nothing about the devices on two buses
 points to a single dev. Which will lead double free [1] during exit.
   - wildcard eventfd ignores data len, so it was registered as a
 kvm_io_range with zero length. This will fail the binary search in
 kvm_io_bus_get_first_dev() when we try to emulate through
 KVM_MMIO_BUS. This will cause userspace io emulation request instead
 of a eventfd notification (virtqueue kick will be trapped by qemu
 instead of vhost in this case).
   
   Fixing this by don't register wildcard mmio eventfd on two
   buses. Instead, only register it in KVM_FAST_MMIO_BUS. This fixes the
   double free issue of kvm_io_bus_destroy(). For the arch/setups that
   does not utilize KVM_FAST_MMIO_BUS, before searching KVM_MMIO_BUS, try
   KVM_FAST_MMIO_BUS first to see it it has a match.
   
   [1] Panic caused by double free:
   
   CPU: 1 PID: 2894 Comm: qemu-system-x86 Not tainted 3.19.0-26-generic 
   #28-Ubuntu
   Hardware name: LENOVO 2356BG6/2356BG6, BIOS G7ET96WW (2.56 ) 
   09/12/2013
   task: 88009ae0c4b0 ti: 88020e7f task.ti: 88020e7f
   RIP: 0010:[c07e25d8]  [c07e25d8] 
   ioeventfd_release+0x28/0x60 [kvm]
   RSP: 0018:88020e7f3bc8  EFLAGS: 00010292
   RAX: dead00200200 RBX: 8801ec19c900 RCX: 00018200016d
   RDX: 8801ec19cf80 RSI: ea0008bf1d40 RDI: 8801ec19c900
   RBP: 88020e7f3bd8 R08: 2fc75a01 R09: 00018200016d
   R10: c07df6ae R11: 88022fc75a98 R12: 88021e7cc000
   R13: 88021e7cca48 R14: 88021e7cca50 R15: 8801ec19c880
   FS:  7fc1ee3e6700() GS:88023e24() 
   knlGS:
   CS:  0010 DS:  ES:  CR0: 80050033
   CR2: 7f8f389d8000 CR3: 00023dc13000 CR4: 001427e0
   Stack:
   88021e7cc000  88020e7f3be8 c07e2622
   88020e7f3c38 c07df69a 880232524160 88020e792d80
 880219b78c00 0008 8802321686a8
   Call Trace:
   [c07e2622] ioeventfd_destructor+0x12/0x20 [kvm]
   [c07df69a] kvm_put_kvm+0xca/0x210 [kvm]
   [c07df818] kvm_vcpu_release+0x18/0x20 [kvm]
   [811f69f7] __fput+0xe7/0x250
   [811f6bae] fput+0xe/0x10
   [81093f04] task_work_run+0xd4/0xf0
   [81079358] do_exit+0x368/0xa50
   [81082c8f] ? recalc_sigpending+0x1f/0x60
   [81079ad5] do_group_exit+0x45/0xb0
   [81085c71] get_signal+0x291/0x750
   [810144d8] do_signal+0x28/0xab0
   [810f3a3b] ? do_futex+0xdb/0x5d0
   [810b7028] ? __wake_up_locked_key+0x18/0x20
   [810f3fa6] ? SyS_futex+0x76/0x170
   [81014fc9] do_notify_resume+0x69/0xb0
   [817cb9af] int_signal+0x12/0x17
   Code: 5d c3 90 0f 1f 44 00 00 55 48 89 e5 53 48 89 fb 48 83 ec 08 48 
   8b 7f 20 e8 06 d6 a5 c0 48 8b 43 08 48 8b 13 48 89 df 48 89 42 08 
   48 89 10 48 b8 00 01 10 00 00
   RIP  [c07e25d8] ioeventfd_release+0x28/0x60 [kvm]
   RSP 88020e7f3bc8
   
   Cc: Gleb Natapov g...@kernel.org
   Cc: Paolo Bonzini pbonz...@redhat.com
   Cc: Michael S. Tsirkin m...@redhat.com
   Signed-off-by: Jason Wang jasow...@redhat.com
   ---
   Changes from V2:
   - Tweak styles and comment suggested by Cornelia.
   Changes from v1:
   - change ioeventfd_bus_from_flags() to return KVM_FAST_MMIO_BUS when
 needed to save lots of unnecessary changes.
   ---
virt/kvm/eventfd.c  | 31 +--
virt/kvm/kvm_main.c | 16 ++--
2 files changed, 23 insertions(+), 24 deletions(-)
   
   diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
   index 9ff4193..c3ffdc3 100644
   --- a/virt/kvm/eventfd.c
   +++ b/virt/kvm/eventfd.c
   @@ -762,13 +762,16 @@ ioeventfd_check_collision(struct kvm *kvm, 
   struct _ioeventfd *p)
 return false;
}

   -static enum kvm_bus ioeventfd_bus_from_flags(__u32 flags)
   +static enum kvm_bus ioeventfd_bus_from_args(struct kvm_ioeventfd 
   *args)
{
   - if (flags  KVM_IOEVENTFD_FLAG_PIO)
   + if (args-flags  KVM_IOEVENTFD_FLAG_PIO)
 return KVM_PIO_BUS;
   - if (flags  KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
   + if (args-flags  KVM_IOEVENTFD_FLAG_VIRTIO_CCW_NOTIFY)
 return KVM_VIRTIO_CCW_NOTIFY_BUS;
   - return KVM_MMIO_BUS;
   + /* When length is ignored, MMIO is put on a separate bus, for
   +  * faster lookups.
   +  */
   + return args-len ? KVM_MMIO_BUS : KVM_FAST_MMIO_BUS;
}

static int
   @@ -779,7 +782,7 @@ kvm_assign_ioeventfd(struct kvm *kvm, struct 
   kvm_ioeventfd *args)
 

Fwd: Data buffer Transfer through Hypercall

2015-08-30 Thread Hu Yaohui
Hi All,
Does anyone know how to transfer data buffer through Hypercall?
According to the current implementation from kvm_emulate_hypercall,
it only takes a primitive type as parameters through different
registers. Can we use hyprecall like read/write system call to
transfer data between guest and hypervisor? Is virtio the best way to
communicate between guest and host at the moment? If that's the case,
which virtio device will be the best.

Thanks,
Yaohui
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC 1/3] vmx: allow ioeventfd for EPT violations

2015-08-30 Thread Xiao Guangrong



On 08/30/2015 05:12 PM, Michael S. Tsirkin wrote:

Even when we skip data decoding, MMIO is slightly slower
than port IO because it uses the page-tables, so the CPU
must do a pagewalk on each access.

This overhead is normally masked by using the TLB cache:
but not so for KVM MMIO, where PTEs are marked as reserved
and so are never cached.

As ioeventfd memory is never read, make it possible to use
RO pages on the host for ioeventfds, instead.


I like this idea.


The result is that TLBs are cached, which finally makes MMIO
as fast as port IO.


What does TLBs are cached mean? Even after applying the patch
no new TLB type can be cached.



Signed-off-by: Michael S. Tsirkin m...@redhat.com
---
  arch/x86/kvm/vmx.c | 5 +
  1 file changed, 5 insertions(+)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9d1bfd3..ed44026 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5745,6 +5745,11 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, 
GUEST_INTR_STATE_NMI);

gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
+   if (!kvm_io_bus_write(vcpu, KVM_FAST_MMIO_BUS, gpa, 0, NULL)) {
+   skip_emulated_instruction(vcpu);
+   return 1;
+   }
+


I am afraid that the common page fault entry point is not a good place to do the
work. Would move it to kvm_handle_bad_page()? The different is the workload of
fast_page_fault() is included but it's light enough and MMIO-exit should not be
very frequent, so i think it's okay.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html