Re: [PATCH] KVM: x86: check ISR and TMR to construct eoi exit bitmap

2014-08-07 Thread Chen, Tiejun

On 2014/8/8 14:02, Yang Zhang wrote:

From: Yang Zhang 

Guest may mask the IOAPIC entry before issue EOI. In such case,
EOI will not be intercepted by hypervisor due to the corrensponding
bit in eoi exit bitmap is not setting.

The solution is to check ISR + TMR to construct the EOI exit bitmap.

This patch is a better fixing for the issue that commit "0f6c0a740b"
tries to solve.

Signed-off-by: Yang Zhang 
---
  arch/x86/kvm/lapic.c |   22 ++
  arch/x86/kvm/lapic.h |2 ++
  arch/x86/kvm/x86.c   |9 +
  virt/kvm/ioapic.c|7 ---
  4 files changed, 37 insertions(+), 3 deletions(-)

hi, alex

This patch is not tested since i don't have the environment to do it. Can you
help to test it? thanks.

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 08e8a89..d2f9a6e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -71,6 +71,11 @@
  #define VEC_POS(v) ((v) & (32 - 1))
  #define REG_POS(v) (((v) >> 5) << 4)

+static inline u32 apic_read_reg(struct kvm_lapic *apic, int reg_off)
+{
+   return *((u32 *) (apic->regs + reg_off));
+}
+


I think we already define the same in the arch/x86/kvm/lapic.h file,

static inline u32 kvm_apic_get_reg(struct kvm_lapic *apic, int reg_off)
{
return *((u32 *) (apic->regs + reg_off));
}

Thanks
Tiejun


  static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
  {
*((u32 *) (apic->regs + reg_off)) = val;
@@ -515,6 +520,23 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
  }

+void kvm_apic_zap_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+   u32 *tmr)
+{
+   u32 i, reg_off, intr_in_service[8];
+   struct kvm_lapic *apic = vcpu->arch.apic;
+
+   for (i = 0; i < 8; i++) {
+   reg_off = 0x10 * i;
+   intr_in_service[i] = apic_read_reg(apic, APIC_ISR + reg_off) &
+   apic_read_reg(apic, APIC_TMR + reg_off);
+   if (intr_in_service[i]) {
+   *((u32 *)eoi_exit_bitmap + i) |= intr_in_service[i];
+   tmr[i] |= intr_in_service[i];
+   }
+   }
+}
+
  void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
  {
struct kvm_lapic *apic = vcpu->arch.apic;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6a11845..4ee3d70 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -53,6 +53,8 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
  u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
  void kvm_apic_set_version(struct kvm_vcpu *vcpu);

+void kvm_apic_zap_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+   u32 *tmr);
  void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
  void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
  int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 204422d..755b556 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6005,6 +6005,15 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
memset(tmr, 0, 32);

kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
+   /*
+* Guest may mask the IOAPIC entry before issue EOI. In such case,
+* EOI will not be intercepted by hypervisor due to the corrensponding
+* bit in eoi exit bitmap is not setting.
+*
+* The solution is to check ISR + TMR to construct the EOI exit bitmap.
+*/
+   kvm_apic_zap_eoi_exitmap(vcpu, eoi_exit_bitmap, tmr);
+
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
kvm_apic_update_tmr(vcpu, tmr);
  }
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index e8ce34c..2458a1d 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -254,9 +254,10 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 
*eoi_exit_bitmap,
spin_lock(&ioapic->lock);
for (index = 0; index < IOAPIC_NUM_PINS; index++) {
e = &ioapic->redirtbl[index];
-   if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
-   kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, 
index) ||
-   index == RTC_GSI) {
+   if (!e->fields.mask &&
+   (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
+kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC,
+index) || index == RTC_GSI)) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
e->fields.dest_id, e->fields.dest_mode)) {
__set_bit(e->fields.vector,


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: x86: check ISR and TMR to construct eoi exit bitmap

2014-08-07 Thread Yang Zhang
From: Yang Zhang 

Guest may mask the IOAPIC entry before issue EOI. In such case,
EOI will not be intercepted by hypervisor due to the corrensponding
bit in eoi exit bitmap is not setting.

The solution is to check ISR + TMR to construct the EOI exit bitmap.

This patch is a better fixing for the issue that commit "0f6c0a740b"
tries to solve.

Signed-off-by: Yang Zhang 
---
 arch/x86/kvm/lapic.c |   22 ++
 arch/x86/kvm/lapic.h |2 ++
 arch/x86/kvm/x86.c   |9 +
 virt/kvm/ioapic.c|7 ---
 4 files changed, 37 insertions(+), 3 deletions(-)

hi, alex

This patch is not tested since i don't have the environment to do it. Can you
help to test it? thanks.

diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index 08e8a89..d2f9a6e 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -71,6 +71,11 @@
 #define VEC_POS(v) ((v) & (32 - 1))
 #define REG_POS(v) (((v) >> 5) << 4)
 
+static inline u32 apic_read_reg(struct kvm_lapic *apic, int reg_off)
+{
+   return *((u32 *) (apic->regs + reg_off));
+}
+
 static inline void apic_set_reg(struct kvm_lapic *apic, int reg_off, u32 val)
 {
*((u32 *) (apic->regs + reg_off)) = val;
@@ -515,6 +520,23 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
__clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention);
 }
 
+void kvm_apic_zap_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+   u32 *tmr)
+{
+   u32 i, reg_off, intr_in_service[8];
+   struct kvm_lapic *apic = vcpu->arch.apic;
+
+   for (i = 0; i < 8; i++) {
+   reg_off = 0x10 * i;
+   intr_in_service[i] = apic_read_reg(apic, APIC_ISR + reg_off) &
+   apic_read_reg(apic, APIC_TMR + reg_off);
+   if (intr_in_service[i]) {
+   *((u32 *)eoi_exit_bitmap + i) |= intr_in_service[i];
+   tmr[i] |= intr_in_service[i];
+   }
+   }
+}
+
 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr)
 {
struct kvm_lapic *apic = vcpu->arch.apic;
diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h
index 6a11845..4ee3d70 100644
--- a/arch/x86/kvm/lapic.h
+++ b/arch/x86/kvm/lapic.h
@@ -53,6 +53,8 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
 u64 kvm_lapic_get_base(struct kvm_vcpu *vcpu);
 void kvm_apic_set_version(struct kvm_vcpu *vcpu);
 
+void kvm_apic_zap_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+   u32 *tmr);
 void kvm_apic_update_tmr(struct kvm_vcpu *vcpu, u32 *tmr);
 void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir);
 int kvm_apic_match_physical_addr(struct kvm_lapic *apic, u16 dest);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 204422d..755b556 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6005,6 +6005,15 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu)
memset(tmr, 0, 32);
 
kvm_ioapic_scan_entry(vcpu, eoi_exit_bitmap, tmr);
+   /*
+* Guest may mask the IOAPIC entry before issue EOI. In such case,
+* EOI will not be intercepted by hypervisor due to the corrensponding
+* bit in eoi exit bitmap is not setting.
+*
+* The solution is to check ISR + TMR to construct the EOI exit bitmap.
+*/
+   kvm_apic_zap_eoi_exitmap(vcpu, eoi_exit_bitmap, tmr);
+
kvm_x86_ops->load_eoi_exitmap(vcpu, eoi_exit_bitmap);
kvm_apic_update_tmr(vcpu, tmr);
 }
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index e8ce34c..2458a1d 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -254,9 +254,10 @@ void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 
*eoi_exit_bitmap,
spin_lock(&ioapic->lock);
for (index = 0; index < IOAPIC_NUM_PINS; index++) {
e = &ioapic->redirtbl[index];
-   if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
-   kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, 
index) ||
-   index == RTC_GSI) {
+   if (!e->fields.mask &&
+   (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
+kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC,
+index) || index == RTC_GSI)) {
if (kvm_apic_match_dest(vcpu, NULL, 0,
e->fields.dest_id, e->fields.dest_mode)) {
__set_bit(e->fields.vector,
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 0/4 resend] Introduce device assignment flag operation helper function

2014-08-07 Thread Ethan Zhao
This patch set introduces three PCI device flag operation helper functions
when set pci device PF/VF to assigned or deassigned status also check it.
and patch 2,3,4 apply these helper functions to KVM,XEN and PCI.

v2: simplify unnecessory ternary operation in function pci_is_dev_assigned().
v3: amend helper function naming.

Appreciate suggestion from
alex.william...@redhat.com,
david.vra...@citrix.com,
alexander.h.du...@intel.com

Resend for v3.16 building.

Thanks,
Ethan
---
Ethan Zhao (4):
  PCI: introduce helper functions for device flag operation
  KVM: use pci device flag operation helper functions
  xen-pciback: use pci device flag operation helper function
  PCI: use device flag operation helper function in iov.c

 drivers/pci/iov.c  |2 +-
 drivers/xen/xen-pciback/pci_stub.c |4 ++--
 include/linux/pci.h|   13 +
 virt/kvm/assigned-dev.c|2 +-
 virt/kvm/iommu.c   |4 ++--
 5 files changed, 19 insertions(+), 6 deletions(-)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 72381] [Nested] L1 call trace when create windows 7 guest as L2 guest.

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=72381

robert...@intel.com changed:

   What|Removed |Added

 Status|RESOLVED|VERIFIED

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 72381] [Nested] L1 call trace when create windows 7 guest as L2 guest.

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=72381

robert...@intel.com changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |CODE_FIX

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/4] xen-pciback: use pci device flag operation helper function

2014-08-07 Thread Ethan Zhao
Use pci device flag operation helper functions when set device
to assigned or deassigned state.

Acked-by: David Vrabel 
Reviewed-by: Konrad Rzeszutek Wilk 
Signed-off-by: Ethan Zhao 
---
 drivers/xen/xen-pciback/pci_stub.c |4 ++--
 1 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/xen/xen-pciback/pci_stub.c 
b/drivers/xen/xen-pciback/pci_stub.c
index d57a173..e593921 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -133,7 +133,7 @@ static void pcistub_device_release(struct kref *kref)
xen_pcibk_config_free_dyn_fields(dev);
xen_pcibk_config_free_dev(dev);
 
-   dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+   pci_clear_dev_assigned(dev);
pci_dev_put(dev);
 
kfree(psdev);
@@ -413,7 +413,7 @@ static int pcistub_init_device(struct pci_dev *dev)
dev_dbg(&dev->dev, "reset device\n");
xen_pcibk_reset_device(dev);
 
-   dev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
+   pci_set_dev_assigned(dev);
return 0;
 
 config_release:
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] PCI: introduce helper functions for device flag operation

2014-08-07 Thread Ethan Zhao
This patch introduced three helper functions to hide direct
device flag operation.

void pci_set_dev_assigned(struct pci_dev *pdev);
void pci_clear_dev_assigned(struct pci_dev *pdev);
bool pci_is_dev_assigned(struct pci_dev *pdev);

Signed-off-by: Ethan Zhao 
---
 v2: simplify unnecessory ternary operation in function pci_is_dev_assigned();
 v3: amend helper functions naming.
---
 include/linux/pci.h |   13 +
 1 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 466bcd1..b610ab3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1829,4 +1829,17 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
  */
 struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
 
+/* helper functions for operation of device flag */
+static inline void pci_set_dev_assigned(struct pci_dev *pdev)
+{
+   pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
+}
+static inline void pci_clear_dev_assigned(struct pci_dev *pdev)
+{
+   pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+}
+static inline bool pci_is_dev_assigned(struct pci_dev *pdev)
+{
+   return pdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED;
+}
 #endif /* LINUX_PCI_H */
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/4] KVM: use pci device flag operation helper functions

2014-08-07 Thread Ethan Zhao
Use helper function instead of direct operation to pci device
flag when set device to assigned or deassigned.

Acked-by: Paolo Bonzini 
Signed-off-by: Ethan Zhao 
---
 virt/kvm/assigned-dev.c |2 +-
 virt/kvm/iommu.c|4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index bf06577..38581ee 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c
@@ -302,7 +302,7 @@ static void kvm_free_assigned_device(struct kvm *kvm,
else
pci_restore_state(assigned_dev->dev);
 
-   assigned_dev->dev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+   pci_clear_dev_assigned(assigned_dev->dev);
 
pci_release_regions(assigned_dev->dev);
pci_disable_device(assigned_dev->dev);
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 0df7d4b..34a8b02 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -194,7 +194,7 @@ int kvm_assign_device(struct kvm *kvm,
goto out_unmap;
}
 
-   pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
+   pci_set_dev_assigned(pdev);
 
dev_info(&pdev->dev, "kvm assign device\n");
 
@@ -220,7 +220,7 @@ int kvm_deassign_device(struct kvm *kvm,
 
iommu_detach_device(domain, &pdev->dev);
 
-   pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+   pci_clear_dev_assigned(pdev);
 
dev_info(&pdev->dev, "kvm deassign device\n");
 
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 4/4] PCI: use device flag operation helper function in iov.c

2014-08-07 Thread Ethan Zhao
Use device flag operation helper functions when check device
assignment status.

Signed-off-by: Ethan Zhao 
---
 drivers/pci/iov.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index cb6f247..4d109c0 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -633,7 +633,7 @@ int pci_vfs_assigned(struct pci_dev *dev)
 * our dev as the physical function and the assigned bit is set
 */
if (vfdev->is_virtfn && (vfdev->physfn == dev) &&
-   (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED))
+   pci_is_dev_assigned(vfdev))
vfs_assigned++;
 
vfdev = pci_get_device(dev->vendor, dev_id, vfdev);
-- 
1.7.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 72381] [Nested] L1 call trace when create windows 7 guest as L2 guest.

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=72381

--- Comment #4 from Zhou, Chao  ---
this commit fixed the bug:
commit 9242b5b60df8b13b469bc6b7be08ff6ebb551ad3
Author: Bandan Das 
Date:   Tue Jul 8 00:30:23 2014 -0400

KVM: x86: Check for nested events if there is an injectable interrupt

With commit b6b8a1451fc40412c57d1 that introduced
vmx_check_nested_events, checks for injectable interrupts happen
at different points in time for L1 and L2 that could potentially
cause a race. The regression occurs because KVM_REQ_EVENT is always
set when nested_run_pending is set even if there's no pending interrupt.
Consequently, there could be a small window when check_nested_events
returns without exiting to L1, but an interrupt comes through soon
after and it incorrectly, gets injected to L2 by inject_pending_event
Fix this by adding a call to check for nested events too when a check
for injectable interrupt returns true

Signed-off-by: Bandan Das 
Signed-off-by: Paolo Bonzini 

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 72381] [Nested] L1 call trace when create windows 7 guest as L2 guest.

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=72381

Zhou, Chao  changed:

   What|Removed |Added

 CC||chao.z...@intel.com

--- Comment #3 from Zhou, Chao  ---
kvm.git + qemu.git:c77dcacb_69f87f71
kernel version: 3.16.0
test on Romley_EP, when create a windows 7 guest as L2 guest,L2/L1 work fine.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] kvm: x86: fix stale mmio cache bug

2014-08-07 Thread David Matlack
On Thu, Aug 7, 2014 at 6:36 PM, Xiao Guangrong
 wrote:
> On 08/08/2014 02:32 AM, David Matlack wrote:
>> The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
>> up to userspace:
>>
>> (1) Guest accesses gpa X without a memory slot. The gfn is cached in
>> struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
>> the SPTE write-execute-noread so that future accesses cause
>> EPT_MISCONFIGs.
>>
>> (2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
>> covering the page just accessed.
>>
>> (3) Guest attempts to read or write to gpa X again. On Intel, this
>> generates an EPT_MISCONFIG. The memory slot generation number that
>> was incremented in (2) would normally take care of this but we fast
>> path mmio faults through quickly_check_mmio_pf(), which only checks
>> the per-vcpu mmio cache. Since we hit the cache, KVM passes a
>> KVM_EXIT_MMIO up to userspace.
>>
>> This patch fixes the issue by using the memslot generation number
>> to validate the mmio cache.
>>
>> Signed-off-by: David Matlack 
>> ---
>> The patch diff is rather large because I had to pull some code out
>> of x86.h and mmu.c and into mmu.h. The main change is adding the
>> memslot generation in vcpu_cach_mmio_info() and then validating
>> that slot in vcpu_match_mmio_*().
>
> Why not just move vcpu_cach_mmio_info() into mmu.c where is
> the only place vcpu_cach_mmio_info is called. :)

If only it was so simple :). vcpu_match_mmio_*() now needs
kvm_current_mmio_generation() which *was* in mmu.c.
vcpu_match_mmio_*() is called from x86.c and mmu.c so all that
has to go in a header. I decided to keep vcpu_cache_mmio_info()
with the  rest for organization's sake.

> BTW, i will post a patch to fix the generation-number issue
> soon.
>
>

Great! Thanks.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 2/5] powerpc/eeh: Add warning message in eeh_dev_open()

2014-08-07 Thread Alex Williamson
On Fri, 2014-08-08 at 13:50 +1000, Benjamin Herrenschmidt wrote:
> On Thu, 2014-08-07 at 12:47 +1000, Gavin Shan wrote:
> > The patch adds one warning message in eeh_dev_open() in case the
> > PCI device can't be marked as passed through.
> > 
> > Suggested-by: Alexey Kardashevskiy 
> > Signed-off-by: Gavin Shan 
> > ---
> 
> Acked-by: Benjamin Herrenschmidt 
> 
> Alex, are you taking this or should I ?

You should take 1 & 2, AFAICT there's no dependencies between anything
in this series.  Thanks,

Alex

> >  arch/powerpc/kernel/eeh.c | 5 -
> >  1 file changed, 4 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> > index 59a64f8..5d73a49 100644
> > --- a/arch/powerpc/kernel/eeh.c
> > +++ b/arch/powerpc/kernel/eeh.c
> > @@ -1162,8 +1162,11 @@ int eeh_dev_open(struct pci_dev *pdev)
> >  
> > /* No EEH device or PE ? */
> > edev = pci_dev_to_eeh_dev(pdev);
> > -   if (!edev || !edev->pe)
> > +   if (!edev || !edev->pe) {
> > +   pr_warn_once("%s: PCI device %s not supported\n",
> > +__func__, pci_name(pdev));
> > goto out;
> > +   }
> >  
> > /* Increase PE's pass through count */
> > atomic_inc(&edev->pe->pass_dev_cnt);
> 
> 



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 2/5] powerpc/eeh: Add warning message in eeh_dev_open()

2014-08-07 Thread Benjamin Herrenschmidt
On Thu, 2014-08-07 at 12:47 +1000, Gavin Shan wrote:
> The patch adds one warning message in eeh_dev_open() in case the
> PCI device can't be marked as passed through.
> 
> Suggested-by: Alexey Kardashevskiy 
> Signed-off-by: Gavin Shan 
> ---

Acked-by: Benjamin Herrenschmidt 

Alex, are you taking this or should I ?

>  arch/powerpc/kernel/eeh.c | 5 -
>  1 file changed, 4 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 59a64f8..5d73a49 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1162,8 +1162,11 @@ int eeh_dev_open(struct pci_dev *pdev)
>  
>   /* No EEH device or PE ? */
>   edev = pci_dev_to_eeh_dev(pdev);
> - if (!edev || !edev->pe)
> + if (!edev || !edev->pe) {
> + pr_warn_once("%s: PCI device %s not supported\n",
> +  __func__, pci_name(pdev));
>   goto out;
> + }
>  
>   /* Increase PE's pass through count */
>   atomic_inc(&edev->pe->pass_dev_cnt);


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Qemu-devel] [questions] about KVMasaMicrosoft-compatiblehypervisor

2014-08-07 Thread Zhang Haoyu
>> >Hi Zhang,
>> >
>> >No I haven't seen such problem
>> >Which kernel version are you running?
>> Host kernel: RHEL7-RC1(linux-3.10.0).
>> 
>> >Does it include the latest lazy eli changes?
>> >
>> lazy eli or lazy eoi?
>EOI
>> How to confirm whether lazy eli has been included?
>> 
>not in linux-3.10.0
So, do you mean hv_vapic need the support of lazy eoi?

>> >Btw, hv_spinlocks=0xfff is a pretty huge value.
>> >
>> which value do you advise to use?
>MS seems to be using 0x as a default.
>best regards,
>Vadim.
>> 
>> Thanks,
>> Zhang Haoyu
>> >Best regards,
>> >Vadim.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v4 1/5] powerpc/eeh: Export eeh_iommu_group_to_pe()

2014-08-07 Thread Benjamin Herrenschmidt
On Thu, 2014-08-07 at 12:47 +1000, Gavin Shan wrote:
> The function is used by VFIO driver, which might be built as a
> dynamic module. So it should be exported.
> 
> Signed-off-by: Gavin Shan 

Acked-by: Benjamin Herrenschmidt 

Alex, are you taking this or should I ?

> ---
>  arch/powerpc/kernel/eeh.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
> index 6043879..59a64f8 100644
> --- a/arch/powerpc/kernel/eeh.c
> +++ b/arch/powerpc/kernel/eeh.c
> @@ -1254,6 +1254,7 @@ struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group 
> *group)
>  
>   return edev->pe;
>  }
> +EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe);
>  
>  #endif /* CONFIG_IOMMU_API */
>  


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

Joel Schopp  changed:

   What|Removed |Added

 CC||joel.sch...@amd.com

--- Comment #10 from Joel Schopp  ---
(In reply to Alex Williamson from comment #9)
> (In reply to Marti Raudsepp from comment #8)
> > (In reply to Alex Williamson from comment #7)
> > > > There are some proposed workarounds on the web
> > > None of these remotely address the issue.
> > 
> > I see. This page claims so: 
> > http://www.ovirt.org/Features/hostdev_passthrough
> 
> Sorry, it's wrong.
> 
> > > there are quirks for the following AMD southbridge components
> > 
> > Nope, mine are 1022:780b, 1022:780c, 1022:780d, 1022:780e, 1022:780f,
> > 1022:7809
> > 
> > > If your bridge does not match these, then AMD will need to confirm whether
> > > isolation is provided between your devices.
> > 
> > How would I go about confirming that? What are the chances that they care,
> > and provide accurate information to a random person?


Are you suggesting we'd provide innacurate information to a random person?


> 
> AMD would need to confirm it.  IOMMU groups are based on hardware advertised
> isolation via the PCIe ACS capability.  Without this, or a device specific
> quirk to take its place, IOMMU groups must assume that peer-to-peer between
> functions of a multi-function device is possible and therefore that the
> devices are not isolated.  Chances are that this new chipset in your system
> is taking the exact same ASICs that were deemed not to do peer-to-peer on
> previous chipsets, but we need that confirmation from AMD.  Alex Deucher
> (see MAINTAINERS) may have contacts available that can make that statement.

I don't have an answer for you offhand.  Let me do some digging and get you an
answer.

> 
> > > There is an ACS override patch
> > 
> > I already ran across it...
> > https://bugzilla.redhat.com/show_bug.cgi?id=1113399
> > Would I be any worse off using this, compared to the old kvm pci-assign
> > method?
> 
> I think the path forward is to get confirmation from AMD that these function
> are isolated from each other and add quirks to the kernel.  Then you won't
> have the device dependencies in vfio-pci.  The override patch allows you to
> do that with just a kernel boot parameter.  There's no gurantee that
> pci-assign will ever be fixed since it's being phased out.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] kvm: x86: fix stale mmio cache bug

2014-08-07 Thread Xiao Guangrong
On 08/08/2014 02:32 AM, David Matlack wrote:
> The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
> up to userspace:
> 
> (1) Guest accesses gpa X without a memory slot. The gfn is cached in
> struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
> the SPTE write-execute-noread so that future accesses cause
> EPT_MISCONFIGs.
> 
> (2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
> covering the page just accessed.
> 
> (3) Guest attempts to read or write to gpa X again. On Intel, this
> generates an EPT_MISCONFIG. The memory slot generation number that
> was incremented in (2) would normally take care of this but we fast
> path mmio faults through quickly_check_mmio_pf(), which only checks
> the per-vcpu mmio cache. Since we hit the cache, KVM passes a
> KVM_EXIT_MMIO up to userspace.
> 
> This patch fixes the issue by using the memslot generation number
> to validate the mmio cache.
> 
> Signed-off-by: David Matlack 
> ---
> The patch diff is rather large because I had to pull some code out
> of x86.h and mmu.c and into mmu.h. The main change is adding the
> memslot generation in vcpu_cach_mmio_info() and then validating
> that slot in vcpu_match_mmio_*().

Why not just move vcpu_cach_mmio_info() into mmu.c where is
the only place vcpu_cach_mmio_info is called. :)

BTW, i will post a patch to fix the generation-number issue
soon.


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-07 Thread Christoffer Dall
On Thu, Aug 07, 2014 at 01:06:09PM -0500, Wei Huang wrote:
> 
> 
> On 08/07/2014 12:53 PM, Christoffer Dall wrote:
> >Currently we only model a virtual machine board (the -machine
> >type=virt parameter) which has a UART, a flash, an RTC, and a bunch of
> >virtio-mmio channelse.
> >
> >Once we either emulate a real aarch64 board (with whatever peripherals
> >it may have) or add a PCI controller to the virt board, then you can
> Out of curiosity, any particular of PCI controller in your mind? I
> have seen people using Synopsys IP in real SOC; but wonder if
> anything else?
> 

We are looking at adding a generic PCI controller (which is what Alvise
is also working on).  Will Deacon did the original work for ARM and
Liviu Dudau is doing the ARM64 version (look in lakml).

-Christoffer
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

--- Comment #9 from Alex Williamson  ---
(In reply to Marti Raudsepp from comment #8)
> (In reply to Alex Williamson from comment #7)
> > > There are some proposed workarounds on the web
> > None of these remotely address the issue.
> 
> I see. This page claims so: http://www.ovirt.org/Features/hostdev_passthrough

Sorry, it's wrong.

> > there are quirks for the following AMD southbridge components
> 
> Nope, mine are 1022:780b, 1022:780c, 1022:780d, 1022:780e, 1022:780f,
> 1022:7809
> 
> > If your bridge does not match these, then AMD will need to confirm whether
> > isolation is provided between your devices.
> 
> How would I go about confirming that? What are the chances that they care,
> and provide accurate information to a random person?

AMD would need to confirm it.  IOMMU groups are based on hardware advertised
isolation via the PCIe ACS capability.  Without this, or a device specific
quirk to take its place, IOMMU groups must assume that peer-to-peer between
functions of a multi-function device is possible and therefore that the devices
are not isolated.  Chances are that this new chipset in your system is taking
the exact same ASICs that were deemed not to do peer-to-peer on previous
chipsets, but we need that confirmation from AMD.  Alex Deucher (see
MAINTAINERS) may have contacts available that can make that statement.

> > There is an ACS override patch
> 
> I already ran across it...
> https://bugzilla.redhat.com/show_bug.cgi?id=1113399
> Would I be any worse off using this, compared to the old kvm pci-assign
> method?

I think the path forward is to get confirmation from AMD that these function
are isolated from each other and add quirks to the kernel.  Then you won't have
the device dependencies in vfio-pci.  The override patch allows you to do that
with just a kernel boot parameter.  There's no gurantee that pci-assign will
ever be fixed since it's being phased out.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

--- Comment #8 from Marti Raudsepp  ---
(In reply to Alex Williamson from comment #7)
> > There are some proposed workarounds on the web
> None of these remotely address the issue.

I see. This page claims so: http://www.ovirt.org/Features/hostdev_passthrough

> there are quirks for the following AMD southbridge components

Nope, mine are 1022:780b, 1022:780c, 1022:780d, 1022:780e, 1022:780f, 1022:7809

> If your bridge does not match these, then AMD will need to confirm whether
> isolation is provided between your devices.

How would I go about confirming that? What are the chances that they care, and
provide accurate information to a random person?

> There is an ACS override patch

I already ran across it... https://bugzilla.redhat.com/show_bug.cgi?id=1113399
Would I be any worse off using this, compared to the old kvm pci-assign method?

> Note that it's not required to assign all the devices, they simply need to
> be detached from host drivers (ie. bound to pci-stub or vfio-pci).

Thanks, I will give it a shot tomorrow.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3] kvm: x86: fix stale mmio cache bug

2014-08-07 Thread David Matlack
The following events can lead to an incorrect KVM_EXIT_MMIO bubbling
up to userspace:

(1) Guest accesses gpa X without a memory slot. The gfn is cached in
struct kvm_vcpu_arch (mmio_gfn). On Intel EPT-enabled hosts, KVM sets
the SPTE write-execute-noread so that future accesses cause
EPT_MISCONFIGs.

(2) Host userspace creates a memory slot via KVM_SET_USER_MEMORY_REGION
covering the page just accessed.

(3) Guest attempts to read or write to gpa X again. On Intel, this
generates an EPT_MISCONFIG. The memory slot generation number that
was incremented in (2) would normally take care of this but we fast
path mmio faults through quickly_check_mmio_pf(), which only checks
the per-vcpu mmio cache. Since we hit the cache, KVM passes a
KVM_EXIT_MMIO up to userspace.

This patch fixes the issue by using the memslot generation number
to validate the mmio cache.

Signed-off-by: David Matlack 
---
The patch diff is rather large because I had to pull some code out
of x86.h and mmu.c and into mmu.h. The main change is adding the
memslot generation in vcpu_cach_mmio_info() and then validating
that slot in vcpu_match_mmio_*().

Changes in v3:
  - remove memory barrier in vcpu_cache_mmio_info()
  - don't unconditionally clear mmio cache in mmu_synch_roots

Changes in v2:
  - Use memslot generation to invalidate the mmio cache rather than
actively invalidating the cache.
  - Update patch description with new cache invalidation technique.
  - Pull mmio cache/clear code up out of x86.h and mmu.c and into
mmu.h.

 arch/x86/include/asm/kvm_host.h |  1 +
 arch/x86/kvm/mmu.c  | 15 +--
 arch/x86/kvm/mmu.h  | 58 +
 arch/x86/kvm/x86.h  | 36 -
 4 files changed, 60 insertions(+), 50 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 49205d0..f518d14 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -479,6 +479,7 @@ struct kvm_vcpu_arch {
u64 mmio_gva;
unsigned access;
gfn_t mmio_gfn;
+   unsigned int mmio_gen;
 
struct kvm_pmu pmu;
 
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 9314678..dd5a30d 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -206,11 +206,8 @@ EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask);
 #define MMIO_SPTE_GEN_LOW_SHIFT3
 #define MMIO_SPTE_GEN_HIGH_SHIFT   52
 
-#define MMIO_GEN_SHIFT 19
 #define MMIO_GEN_LOW_SHIFT 9
 #define MMIO_GEN_LOW_MASK  ((1 << MMIO_GEN_LOW_SHIFT) - 1)
-#define MMIO_GEN_MASK  ((1 << MMIO_GEN_SHIFT) - 1)
-#define MMIO_MAX_GEN   ((1 << MMIO_GEN_SHIFT) - 1)
 
 static u64 generation_mmio_spte_mask(unsigned int gen)
 {
@@ -234,16 +231,6 @@ static unsigned int get_mmio_spte_generation(u64 spte)
return gen;
 }
 
-static unsigned int kvm_current_mmio_generation(struct kvm *kvm)
-{
-   /*
-* Init kvm generation close to MMIO_MAX_GEN to easily test the
-* code of handling generation number wrap-around.
-*/
-   return (kvm_memslots(kvm)->generation +
- MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
-}
-
 static void mark_mmio_spte(struct kvm *kvm, u64 *sptep, u64 gfn,
   unsigned access)
 {
@@ -3163,7 +3150,7 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu)
if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
return;
 
-   vcpu_clear_mmio_info(vcpu, ~0ul);
+   vcpu_clear_mmio_info(vcpu, MMIO_GVA_ANY);
kvm_mmu_audit(vcpu, AUDIT_PRE_SYNC);
if (vcpu->arch.mmu.root_level == PT64_ROOT_LEVEL) {
hpa_t root = vcpu->arch.mmu.root_hpa;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b982112..a98a060 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -82,6 +82,64 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, struct 
kvm_mmu *context,
 void update_permission_bitmask(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
bool ept);
 
+#define MMIO_GEN_SHIFT 19
+#define MMIO_GEN_MASK  ((1 << MMIO_GEN_SHIFT) - 1)
+#define MMIO_MAX_GEN   ((1 << MMIO_GEN_SHIFT) - 1)
+static inline unsigned int kvm_current_mmio_generation(struct kvm *kvm)
+{
+   /*
+* Init kvm generation close to MMIO_MAX_GEN to easily test the
+* code of handling generation number wrap-around.
+*/
+   return (kvm_memslots(kvm)->generation +
+   MMIO_MAX_GEN - 150) & MMIO_GEN_MASK;
+}
+
+static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu,
+   gva_t gva, gfn_t gfn, unsigned access)
+{
+   vcpu->arch.mmio_gen = kvm_current_mmio_generation(vcpu->kvm);
+   vcpu->arch.mmio_gva = gva & PAGE_MASK;
+   vcpu->arch.access = access;
+   vcpu->arch.mmio_gfn = gfn;
+}
+
+/*
+ * Clear the mmio cac

looking for info on TSC virtualization with kvm

2014-08-07 Thread Chris Friesen
I'm trying to find out some hard data on TSC virtualization when using 
qemu-kvm to run linux guests on Intel-based linux 3.4 hosts.


I've read 
"https://www.kernel.org/doc/Documentation/virtual/kvm/timekeeping.txt"; 
and looked at the code somewhat, but it's a bit tricky to figure out 
exactly what will happen.


I'm using kvm_clock for the guest kernel clocksource, but I'm concerned 
about applications in the guest accessing the TSC directly.


My main concern is what happens during live migration to a host with a 
faster TSC frequency (even though both hosts may have nonstop constant 
TSCs).


It appears that VMX provides hardware virtualization of the TSC to allow 
an offset to be applied, but no support for frequency scaling.  Under 
what circumstances would KVM use the hardware virtualized TSC?  What 
would happen on migration to a host with faster TSC?


Does KVM support a fully-emulated software TSC?  If so, under what 
circumstances would it be used and can it handle frequency shifts and 
offsets over live migration?


Lastly...I found a patch series by Zachary Amsden at 
"http://thread.gmane.org/gmane.linux.kernel/1157689"; that doesn't seem 
to have ever made it in to the mainline kernel...why not?


Thanks,
Chris
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-07 Thread Joel Schopp


On 08/07/2014 12:53 PM, Christoffer Dall wrote:

Currently we only model a virtual machine board (the -machine
type=virt parameter) which has a UART, a flash, an RTC, and a bunch of
virtio-mmio channelse.

Once we either emulate a real aarch64 board (with whatever peripherals
it may have) or add a PCI controller to the virt board, then you can
choose whatever storage the real board has or start doing interesting
things like plugging in a scsi controller to your PCI controller on
the virt board or whatever else you desire.
I am very interested in having a PCI controller on the virt board to be 
able to do some testing of "-device pci-assign" and "-device vfio-pci".  
I noticed that Alvise Rigo (ccd) had sent some patches out to the 
qemu-devel list July 11th that seem to add a generic pci controller.




But as Joel points out, VirtIO is likely to get you the best
performance and is the most convenient method.

-Christoffer

On Thu, Aug 7, 2014 at 6:51 PM, Mathew Li  wrote:

Great. VirtIO works for me. Thanks for your help folks!

Is there is any other way to add virtual disk, more like a traditional
disk to qemu-system-aarch64? For example IDE disk or SATA disk or
maybe as a SCSI disk?



--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

--- Comment #7 from Alex Williamson  ---
(In reply to Marti Raudsepp from comment #6)
> (In reply to Alex Williamson from comment #5)
> > What if you use vfio-pci instead of pci-assign?
> 
> I run into the dreaded error:
>   vfio: error, group 9 is not viable, please ensure all devices within the
>   iommu_group are bound to their vfio bus driver
> 
> There are some proposed workarounds on the web, like passing
> vfio_iommu_type1.allow_unsafe_interrupts=1 or pci=realloc, but these seem to
> change nothing for me.

None of these remotely address the issue.  If you're running at least 3.12
there are quirks for the following AMD southbridge components:

 * 1002:4385 SBx00 SMBus Controller
 * 1002:439c SB7x0/SB8x0/SB9x0 IDE Controller
 * 1002:4383 SBx00 Azalia (Intel HDA)
 * 1002:439d SB7x0/SB8x0/SB9x0 LPC host controller
 * 1002:4384 SBx00 PCI to PCI Bridge
 * 1002:4399 SB7x0/SB8x0/SB9x0 USB OHCI2 Controller

If your bridge does not match these, then AMD will need to confirm whether
isolation is provided between your devices.  There is an ACS override patch
floating around which allows assuming device isolation, but this is generally a
bad idea, can introduce obscure bugs, and will not be merged upstream.

> So I tried adding all the PCI devices in the IOMMU group as passthrough
> devices (including IDE, SMBus, audio and OHCI controllers). But then QEMU's
> SeaBIOS gets so confused it can no longer find a hard drive to boot off.

Note that it's not required to assign all the devices, they simply need to be
detached from host drivers (ie. bound to pci-stub or vfio-pci).

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-07 Thread Wei Huang



On 08/07/2014 12:53 PM, Christoffer Dall wrote:

Currently we only model a virtual machine board (the -machine
type=virt parameter) which has a UART, a flash, an RTC, and a bunch of
virtio-mmio channelse.

Once we either emulate a real aarch64 board (with whatever peripherals
it may have) or add a PCI controller to the virt board, then you can
Out of curiosity, any particular of PCI controller in your mind? I have 
seen people using Synopsys IP in real SOC; but wonder if anything else?


-Wei


choose whatever storage the real board has or start doing interesting
things like plugging in a scsi controller to your PCI controller on
the virt board or whatever else you desire.

But as Joel points out, VirtIO is likely to get you the best
performance and is the most convenient method.

-Christoffer

On Thu, Aug 7, 2014 at 6:51 PM, Mathew Li  wrote:

Great. VirtIO works for me. Thanks for your help folks!

Is there is any other way to add virtual disk, more like a traditional
disk to qemu-system-aarch64? For example IDE disk or SATA disk or
maybe as a SCSI disk?


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

--- Comment #6 from Marti Raudsepp  ---
(In reply to Alex Williamson from comment #5)
> What if you use vfio-pci instead of pci-assign?

I run into the dreaded error:
  vfio: error, group 9 is not viable, please ensure all devices within the
  iommu_group are bound to their vfio bus driver

There are some proposed workarounds on the web, like passing
vfio_iommu_type1.allow_unsafe_interrupts=1 or pci=realloc, but these seem to
change nothing for me.

So I tried adding all the PCI devices in the IOMMU group as passthrough devices
(including IDE, SMBus, audio and OHCI controllers). But then QEMU's SeaBIOS
gets so confused it can no longer find a hard drive to boot off.

But you're right. At least I can stop the non-functional virtual machine now,
so I've got that going for me, which is nice.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-07 Thread Christoffer Dall
Currently we only model a virtual machine board (the -machine
type=virt parameter) which has a UART, a flash, an RTC, and a bunch of
virtio-mmio channelse.

Once we either emulate a real aarch64 board (with whatever peripherals
it may have) or add a PCI controller to the virt board, then you can
choose whatever storage the real board has or start doing interesting
things like plugging in a scsi controller to your PCI controller on
the virt board or whatever else you desire.

But as Joel points out, VirtIO is likely to get you the best
performance and is the most convenient method.

-Christoffer

On Thu, Aug 7, 2014 at 6:51 PM, Mathew Li  wrote:
> Great. VirtIO works for me. Thanks for your help folks!
>
> Is there is any other way to add virtual disk, more like a traditional
> disk to qemu-system-aarch64? For example IDE disk or SATA disk or
> maybe as a SCSI disk?
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-07 Thread Christoffer Dall
That's probably because you updated your guest kernel to one that
supports PSCI v0.2 and therefore ignores the incorrect function IDs in
the DT (as it should).

I've sent a fix to qemu-devel@ today:
http://lists.gnu.org/archive/html/qemu-devel/2014-08/msg01179.html

On Wed, Aug 6, 2014 at 6:48 PM, Joel Schopp  wrote:
> It turns out that after a recent rebase of my kernel and qemu to the
> latest the problem is fixed.  Rather than hunt down what fixed it I'm
> just accepting the win and moving on. -smp 4 now works.
>
> -Joel
>
> On 08/06/2014 11:15 AM, Christoffer Dall wrote:
>> On Tue, Aug 5, 2014 at 4:18 PM, Joel Schopp  wrote:
>>> On 08/04/2014 07:35 PM, Mathew Li wrote:
 Hi,

 I have a quick question. How do we add a hard disk to the qemu ARM VM?

 I tried:

 qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image
 -initrd initrd.img

 Nothing seems to work. I am not able to see any disk (i.e. dev/sdX)
 inside guest OS.
>>> I've been running something like this:
>>>
>>> qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev 
>>> tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device 
>>> virtio-net-device,netdev=t0,id=nic0 \
>>> -kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs 
>>> -device virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append 
>>> "console=ttyAMA0 console=ttyS0 root=/dev/vda"
>>>
>>>
>>> On my system -smp 2 or higher hangs in the guest kernel.
>> The -smp 2 hang issue is probably due to a missing PSCI v0.2 follow-up
>> patch to QEMU, you can try:
>> https://git.linaro.org/people/christoffer.dall/qemu-arm.git/shortlog/refs/heads/psci2-smp-fix
>>
>> [disclaimer: there may be a better fix somewhere on the qemu list, I
>> haven't kept track the last couple of days]
>>
>> -Christoffer
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-07 Thread Joel Schopp
virtio will get you the best performance so why would you want to use 
something slower?


-Joel

On 08/07/2014 11:51 AM, Mathew Li wrote:

Great. VirtIO works for me. Thanks for your help folks!

Is there is any other way to add virtual disk, more like a traditional
disk to qemu-system-aarch64? For example IDE disk or SATA disk or
maybe as a SCSI disk?

On Wed, Aug 6, 2014 at 9:48 AM, Joel Schopp  wrote:

It turns out that after a recent rebase of my kernel and qemu to the
latest the problem is fixed.  Rather than hunt down what fixed it I'm
just accepting the win and moving on. -smp 4 now works.

-Joel

On 08/06/2014 11:15 AM, Christoffer Dall wrote:

On Tue, Aug 5, 2014 at 4:18 PM, Joel Schopp  wrote:

On 08/04/2014 07:35 PM, Mathew Li wrote:

Hi,

I have a quick question. How do we add a hard disk to the qemu ARM VM?

I tried:

qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd initrd.img

qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd initrd.img

qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image
-initrd initrd.img

Nothing seems to work. I am not able to see any disk (i.e. dev/sdX)
inside guest OS.

I've been running something like this:

qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev 
tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device 
virtio-net-device,netdev=t0,id=nic0 \
-kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs -device 
virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append "console=ttyAMA0 
console=ttyS0 root=/dev/vda"


On my system -smp 2 or higher hangs in the guest kernel.

The -smp 2 hang issue is probably due to a missing PSCI v0.2 follow-up
patch to QEMU, you can try:
https://git.linaro.org/people/christoffer.dall/qemu-arm.git/shortlog/refs/heads/psci2-smp-fix

[disclaimer: there may be a better fix somewhere on the qemu list, I
haven't kept track the last couple of days]

-Christoffer


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: KVM on ARM64

2014-08-07 Thread Mathew Li
Great. VirtIO works for me. Thanks for your help folks!

Is there is any other way to add virtual disk, more like a traditional
disk to qemu-system-aarch64? For example IDE disk or SATA disk or
maybe as a SCSI disk?

On Wed, Aug 6, 2014 at 9:48 AM, Joel Schopp  wrote:
> It turns out that after a recent rebase of my kernel and qemu to the
> latest the problem is fixed.  Rather than hunt down what fixed it I'm
> just accepting the win and moving on. -smp 4 now works.
>
> -Joel
>
> On 08/06/2014 11:15 AM, Christoffer Dall wrote:
>> On Tue, Aug 5, 2014 at 4:18 PM, Joel Schopp  wrote:
>>> On 08/04/2014 07:35 PM, Mathew Li wrote:
 Hi,

 I have a quick question. How do we add a hard disk to the qemu ARM VM?

 I tried:

 qemu-system-aarch64 -machine virt -hda disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -sd disk.img -kernel image -initrd 
 initrd.img

 qemu-system-aarch64 -machine virt -mtdblock disk.img -kernel image
 -initrd initrd.img

 Nothing seems to work. I am not able to see any disk (i.e. dev/sdX)
 inside guest OS.
>>> I've been running something like this:
>>>
>>> qemu-system-aarch64 -smp 1 --enable-kvm -nographic -netdev 
>>> tap,id=t0,ifname=tap0,script=no,downscript=no,vhost=on -device 
>>> virtio-net-device,netdev=t0,id=nic0 \
>>> -kernel /extra/rootfs/boot/Image -drive file=/extra/rootfs.img,id=fs 
>>> -device virtio-blk-device,drive=fs -m 512 -M virt -cpu host -append 
>>> "console=ttyAMA0 console=ttyS0 root=/dev/vda"
>>>
>>>
>>> On my system -smp 2 or higher hangs in the guest kernel.
>> The -smp 2 hang issue is probably due to a missing PSCI v0.2 follow-up
>> patch to QEMU, you can try:
>> https://git.linaro.org/people/christoffer.dall/qemu-arm.git/shortlog/refs/heads/psci2-smp-fix
>>
>> [disclaimer: there may be a better fix somewhere on the qemu list, I
>> haven't kept track the last couple of days]
>>
>> -Christoffer
>
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

Alex Williamson  changed:

   What|Removed |Added

 CC||alex.william...@redhat.com

--- Comment #5 from Alex Williamson  ---
What if you use vfio-pci instead of pci-assign?  The BUG happens when the
kernel tries to detach a device from the domain, but the device doesn't
actually belong to a domain.  VFIO likely already avoids this because the
bridge and device will both be in the same IOMMU group and therefore attached
to the same domain.

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

Marti Raudsepp  changed:

   What|Removed |Added

 Kernel Version|3.13 (Ubuntu:   |3.16.0 (originally Ubuntu
   |3.13.0-32-generic)  |3.13.0-32-generic)

--- Comment #4 from Marti Raudsepp  ---
Also occurs with freshly built mainline kernel version 3.16.0.

[   87.327457] [ cut here ]
[   87.327488] kernel BUG at drivers/iommu/amd_iommu.c:2382!
[   87.327505] invalid opcode:  [#1] SMP 
[   87.327526] Modules linked in: pci_stub(E) ipt_MASQUERADE(E) iptable_nat(E)
nf_nat_ipv4(E) nf_nat(E) nf_conntrack_ipv4(E) nf_defrag_ipv4(E) xt_conntrack(E)
nf_conntrack(E) ipt_REJECT(E) xt_CHECKSUM(E) iptable_mangle(E) xt_tcpudp(E)
bridge(E) stp(E) llc(E) ip6table_filter(E) ip6_tables(E) iptable_filter(E)
ip_tables(E) ebtable_nat(E) ebtables(E) x_tables(E) nct6775(E) hwmon_vid(E)
radeon(E) kvm_amd(E) kvm(E) snd_hda_codec_realtek(E) snd_hda_codec_generic(E)
snd_hda_codec_hdmi(E) snd_hda_intel(E) snd_hda_controller(E) snd_hda_codec(E)
i2c_algo_bit(E) crct10dif_pclmul(E) drm_kms_helper(E) crc32_pclmul(E)
ghash_clmulni_intel(E) snd_hwdep(E) aesni_intel(E) snd_pcm(E) ttm(E)
aes_x86_64(E) glue_helper(E) netconsole(E) drm(E) lrw(E) snd_timer(E)
configfs(E) snd(E) gf128mul(E) ablk_helper(E) cryptd(E) soundcore(E) lp(E)
serio_raw(E) k10temp(E) i2c_piix4(E) mac_hid(E) video(E) parport(E)
usb_storage(E) pata_acpi(E) hid_generic(E) usbhid(E) hid(E) alx(E) psmouse(E)
mdio(E) pata_atiixp(E) ahci(E) libahci(E)
[   87.327963] CPU: 0 PID: 1452 Comm: qemu-system-x86 Tainted: GE
3.16.0 #1
[   87.327986] Hardware name: To Be Filled By O.E.M. To Be Filled By
O.E.M./FM2A88X Extreme6+, BIOS L3.16 04/16/2014
[   87.328016] task: 880427a18000 ti: 88042128 task.ti:
88042128
[   87.328039] RIP: 0010:[]  []
__detach_device+0xad/0xb0
[   87.328071] RSP: 0018:880421283b38  EFLAGS: 00010046
[   87.328088] RAX:  RBX: 8804286e5240 RCX:
880421283ae0
[   87.328110] RDX: dead00100100 RSI: 0086 RDI:
8804286e5240
[   87.328132] RBP: 880421283b58 R08: 0046 R09:
8804299b8900
[   87.328154] R10: 8800 R11: 000ff000 R12:

[   87.328175] R13: 88042127a610 R14: 88042744c040 R15:
8804286e5240
[   87.328197] FS:  7f1d03857700() GS:88043ec0()
knlGS:
[   87.328221] CS:  0010 DS:  ES:  CR0: 80050033
[   87.328239] CR2: 7f1d03dc63a0 CR3: 01c13000 CR4:
000407f0
[   87.328260] Stack:
[   87.328268]  dead00100100 88042127a600 88042127a610
88042744c040
[   87.328299]  880421283b98 81605a7e 0202
88042744c040
[   87.328333]  88042744c040 880420a3c008 8804242b0a80
88007786dfd8
[   87.328365] Call Trace:
[   87.328378]  [] amd_iommu_domain_destroy+0x9e/0x160
[   87.328400]  [] iommu_domain_free+0x1b/0x30
[   87.328432]  [] kvm_iommu_unmap_guest+0x53/0x60 [kvm]
[   87.328461]  [] kvm_arch_destroy_vm+0x39/0x1f0 [kvm]
[   87.328484]  [] ? synchronize_srcu+0x1d/0x20
[   87.328509]  [] kvm_put_kvm+0x10e/0x220 [kvm]
[   87.328535]  [] kvm_vcpu_release+0x18/0x20 [kvm]
[   87.328556]  [] __fput+0xe4/0x220
[   87.328573]  [] fput+0xe/0x10
[   87.328591]  [] task_work_run+0xc4/0xe0
[   87.328609]  [] do_exit+0x2b8/0xa60
[   87.328627]  [] do_group_exit+0x3f/0xa0
[   87.328645]  [] get_signal_to_deliver+0x1d0/0x6f0
[   87.328668]  [] do_signal+0x48/0x9d0
[   87.328687]  [] ? acct_account_cputime+0x1c/0x20
[   87.328708]  [] ? account_user_time+0x8b/0xa0
[   87.329791]  [] ? vtime_account_user+0x54/0x60
[   87.330869]  [] do_notify_resume+0x69/0xb0
[   87.331950]  [] int_signal+0x12/0x17
[   87.333016] Code: fe ff ff eb b8 66 0f 1f 84 00 00 00 00 00 48 8b 35 69 b0
9a 00 49 39 f4 74 c1 48 89 df e8 8c fd ff ff 5b 41 5c 41 5d 41 5e 5d c3 <0f> 0b
90 66 66 66 66 90 55 48 89 e5 41 57 41 56 49 89 fe 41 55 
[   87.335373] RIP  [] __detach_device+0xad/0xb0
[   87.336475]  RSP 
[   87.337562] ---[ end trace bee5733468f37c81 ]---

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: pci passthrough error about insufficient iommu width

2014-08-07 Thread Alex Williamson
On Wed, 2014-08-06 at 23:12 -0700, Nishank Trivedi wrote:
> On 8/6/14, 2:49 PM, William Tu wrote:
> >>
> >> Try vfio-pci instead of pci-assign
> >>
> 
> Thanks Alex, William. Using vfio worked.
> So does that mean pci-assign is being deprecated?

Yes, vfio is meant to replace pci-assign with a better device ownership
model, better security, better separation from KVM, support for multiple
architectures, etc.  There are no concrete plans to remove pci-assign,
but as you can see, it's not as well tested or maintained as vfio.
Thanks,

Alex

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

--- Comment #3 from Marti Raudsepp  ---
Created attachment 145441
  --> https://bugzilla.kernel.org/attachment.cgi?id=145441&action=edit
dmidecode.txt

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

--- Comment #2 from Marti Raudsepp  ---
Created attachment 145431
  --> https://bugzilla.kernel.org/attachment.cgi?id=145431&action=edit
startup_dmesg.txt

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Bug 81841] New: amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

Bug ID: 81841
   Summary: amd-iommu: kernel BUG & lockup after shutting down KVM
guest using PCI passthrough/PCIe bridge
   Product: Virtualization
   Version: unspecified
Kernel Version: 3.13 (Ubuntu: 3.13.0-32-generic)
  Hardware: All
OS: Linux
  Tree: Mainline
Status: NEW
  Severity: normal
  Priority: P1
 Component: kvm
  Assignee: virtualization_...@kernel-bugs.osdl.org
  Reporter: ma...@juffo.org
Regression: No

I have a Windows XP virtual machine in libvirt and I'm trying to use PCI
passthrough to provide access to a legacy Dialogic ISDN card (:00:05.0).
Since it's an old PCI device, there's also a PCIe-to-PCI bridge (:00:14.4).
With some manual tinkering, the virtual machine starts up and passthrough works
fine, but when I stop or shut down the virtual machine, I immediately get an
oops in dmesg and after some time passes, the whole machine freezes.

I'm using the ASRock FM2A88X Extreme6+ motherboard, tried with the latest BIOS
version 2.90 as well as beta version L3.16. AMD A10-7850K processor.

The same symptoms have also been reported before:
* 3.2.0: http://permalink.gmane.org/gmane.comp.emulators.kvm.devel/85138
* 3.0.6: https://www.mail-archive.com/kvm@vger.kernel.org/msg64854.html
* 2.6.37-rc6: http://marc.info/?l=kvm&m=129867567106942 - slightly different
traceback

In order for the VM to successfully start up, I need to run the following
commands manually, to bind the PCI bridge to pci-stub and then unbind:

modprobe pci-stub
echo '1022 780f' > /sys/bus/pci/drivers/pci-stub/new_id
echo :00:14.4 > /sys/bus/pci/drivers/pci-stub/bind
echo :00:14.4 > /sys/bus/pci/drivers/pci-stub/unbind
echo '1022 780f' > /sys/bus/pci/drivers/pci-stub/remove_id

(If I don't do this, I get the kernel message:
pci-stub :01:05.0: kvm assign device failed ret -16)

lspci -vt
-[:00]-+-00.0  Advanced Micro Devices, Inc. [AMD] Device 1422
   +-00.2  Advanced Micro Devices, Inc. [AMD] Device 1423
   +-01.0  Advanced Micro Devices, Inc. [AMD/ATI] Kaveri [Radeon R7 200
Series]
   +-01.1  Advanced Micro Devices, Inc. [AMD/ATI] Device 1308
   +-02.0  Advanced Micro Devices, Inc. [AMD] Device 1424
   +-03.0  Advanced Micro Devices, Inc. [AMD] Device 1424
   +-04.0  Advanced Micro Devices, Inc. [AMD] Device 1424
   +-10.0  Advanced Micro Devices, Inc. [AMD] FCH USB XHCI Controller
   +-10.1  Advanced Micro Devices, Inc. [AMD] FCH USB XHCI Controller
   +-11.0  Advanced Micro Devices, Inc. [AMD] FCH SATA Controller [AHCI
mode]
   +-12.0  Advanced Micro Devices, Inc. [AMD] FCH USB OHCI Controller
   +-12.2  Advanced Micro Devices, Inc. [AMD] FCH USB EHCI Controller
   +-13.0  Advanced Micro Devices, Inc. [AMD] FCH USB OHCI Controller
   +-13.2  Advanced Micro Devices, Inc. [AMD] FCH USB EHCI Controller
   +-14.0  Advanced Micro Devices, Inc. [AMD] FCH SMBus Controller
   +-14.1  Advanced Micro Devices, Inc. [AMD] FCH IDE Controller
   +-14.2  Advanced Micro Devices, Inc. [AMD] FCH Azalia Controller
   +-14.3  Advanced Micro Devices, Inc. [AMD] FCH LPC Bridge
   +-14.4-[01]05.0  Dialogic Corporation PRI
   +-14.5  Advanced Micro Devices, Inc. [AMD] FCH USB OHCI Controller
   +-15.0-[02]--
   +-15.2-[03]00.0  ASMedia Technology Inc. ASM1042 SuperSpeed USB
Host Controller
   +-15.3-[04]00.0  Qualcomm Atheros QCA8171 Gigabit Ethernet
   +-18.0  Advanced Micro Devices, Inc. [AMD] Device 141a
   +-18.1  Advanced Micro Devices, Inc. [AMD] Device 141b
   +-18.2  Advanced Micro Devices, Inc. [AMD] Device 141c
   +-18.3  Advanced Micro Devices, Inc. [AMD] Device 141d
   +-18.4  Advanced Micro Devices, Inc. [AMD] Device 141e
   \-18.5  Advanced Micro Devices, Inc. [AMD] Device 141f

After shutting down, I get lots of oops messages; these are captured via
netconsole.

[ 1949.942276] [ cut here ]
[ 1949.942311] kernel BUG at
/build/buildd/linux-3.13.0/drivers/iommu/amd_iommu.c:2382!
[ 1949.942342] invalid opcode:  [#1] SMP
[ 1949.942359] Modules linked in: pci_stub ipt_MASQUERADE iptable_nat
nf_nat_ipv4 nf_nat nf_conntrack_ipv4 nf_defrag_ipv4 xt_conntrack nf_conntrack
ipt_REJECT xt_CHECKSUM iptable_mangle xt_tcpudp bridge stp llc ip6table_filter
ip6_tables iptable_filter ip_tables ebtable_nat ebtables x_tables nct6775
hwmon_vid snd_hda_codec_realtek netconsole kvm_amd snd_timer drm_kms_helper snd
drm soundcore mac_hid i2c_algo_bit[ 1949.942716] Hardware name: To Be Filled By
O.E.M. To Be Filled By O.E.M./FM2A88X Extreme6+, BIOS L3.16 04/16/2014
[ 1949.942745] task: 8804284497f0 ti: 8800361a2000 task.ti:
8800361a2000
[ 1949.942767] RIP: 0

[Bug 81841] amd-iommu: kernel BUG & lockup after shutting down KVM guest using PCI passthrough/PCIe bridge

2014-08-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=81841

--- Comment #1 from Marti Raudsepp  ---
Created attachment 145421
  --> https://bugzilla.kernel.org/attachment.cgi?id=145421&action=edit
crash_netconsole.txt

-- 
You are receiving this mail because:
You are watching the assignee of the bug.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] KVM: nVMX: nested TPR shadow/threshold emulation

2014-08-07 Thread Paolo Bonzini
Il 06/08/2014 08:38, Zhang, Yang Z ha scritto:
> Paolo Bonzini wrote on 2014-08-05:
>> Il 05/08/2014 09:56, Zhang, Yang Z ha scritto:
>>> Wanpeng Li wrote on 2014-08-04:
 This patch fix bug
 https://bugzilla.kernel.org/show_bug.cgi?id=61411

 TPR shadow/threshold feature is important to speed up the Windows guest.
 Besides, it is a must feature for certain VMM.

 We map virtual APIC page address and TPR threshold from L1 VMCS. If
 TPR_BELOW_THRESHOLD VM exit is triggered by L2 guest and L1
 interested in, we inject it into L1 VMM for handling.

 Signed-off-by: Wanpeng Li 
 ---
 v2 -> v3:
  * nested vm entry failure if both tpr shadow and cr8 exiting bits
 are not set
 v1 -> v2:
  * don't take L0's "virtualize APIC accesses" setting into account *
  virtual_apic_page do exactly the same thing that is done for
  apic_access_page * add the tpr threshold field to the read-write
  fields for shadow
 VMCS

  arch/x86/kvm/vmx.c | 38 --
  1 file changed, 36 insertions(+), 2 deletions(-)
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index
 c604f3c..7a56e2c 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -379,6 +379,7 @@ struct nested_vmx {
 * we must keep them pinned while L2 runs.   */ struct page
  *apic_access_page; +  struct page *virtual_apic_page; u64
  msr_ia32_feature_control;
  
struct hrtimer preemption_timer; @@ -533,6 +534,7 @@ static int
  max_shadow_read_only_fields = ARRAY_SIZE(shadow_read_only_fields);
  
  static unsigned long shadow_read_write_fields[] = { + TPR_THRESHOLD,
GUEST_RIP,  GUEST_RSP,  GUEST_CR0,
 @@ -2330,7 +2332,7 @@ static __init void
 nested_vmx_setup_ctls_msrs(void)
CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_EXITING |
CPU_BASED_RDPMC_EXITING | CPU_BASED_RDTSC_EXITING |
 -  CPU_BASED_PAUSE_EXITING |
 +  CPU_BASED_PAUSE_EXITING | CPU_BASED_TPR_SHADOW |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;  /*   * We can allow 
 some
  features even when not supported by the @@ -6148,6 +6150,10 @@ static
  void free_nested(struct vcpu_vmx *vmx)
nested_release_page(vmx->nested.apic_access_page);
vmx->nested.apic_access_page = 0;   }
 +  if (vmx->nested.virtual_apic_page) {
 +  nested_release_page(vmx->nested.virtual_apic_page);
 +  vmx->nested.virtual_apic_page = 0;
 +  }

nested_free_all_saved_vmcss(vmx);  } @@ -6936,7 +6942,7 @@ static
  bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)   case
  EXIT_REASON_MCE_DURING_VMENTRY:   return 0;   case
  EXIT_REASON_TPR_BELOW_THRESHOLD:
 -  return 1;
 +  return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
case EXIT_REASON_APIC_ACCESS:
return nested_cpu_has2(vmcs12,
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
 @@ -7057,6 +7063,9 @@ static int vmx_handle_exit(struct kvm_vcpu
 *vcpu)

  static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr,
 int
 irr)  {
 +  if (is_guest_mode(vcpu))
 +  return;
 +
if (irr == -1 || tpr < irr) {
vmcs_write32(TPR_THRESHOLD, 0);
return;
 @@ -8024,6 +8033,27 @@ static void prepare_vmcs02(struct kvm_vcpu
 *vcpu, struct vmcs12 *vmcs12)
exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING;
exec_control &= ~CPU_BASED_TPR_SHADOW;
exec_control |= vmcs12->cpu_based_vm_exec_control;
 +
 +  if (exec_control & CPU_BASED_TPR_SHADOW) {
 +  if (vmx->nested.virtual_apic_page)
 +  nested_release_page(vmx->nested.virtual_apic_page);
 +  vmx->nested.virtual_apic_page =
 + nested_get_page(vcpu, vmcs12->virtual_apic_page_addr);
 +  if (!vmx->nested.virtual_apic_page)
 +  exec_control &=
 +  ~CPU_BASED_TPR_SHADOW;
 +  else
 +  vmcs_write64(VIRTUAL_APIC_PAGE_ADDR,
 +  page_to_phys(vmx->nested.virtual_apic_page));
 +
 +  if (!(exec_control & CPU_BASED_TPR_SHADOW) &&
 +  !((exec_control & CPU_BASED_CR8_LOAD_EXITING) &&
 +  (exec_control & CPU_BASED_CR8_STORE_EXITING)))
 +  nested_vmx_failValid(vcpu,
 VMXERR_ENTRY_INVALID_CONTROL_FIELD);
>>>
>>> I think this is not correct. The vmx->nested.virtual_apic_page may not
>>> valid due to two reasons: 1. The virtual_apic_page_addr is not a valid
>>> gfn. In this case, the vmx failure
>> must be injected to L1 unconditional

Re: [PATCH/RFC] KVM: track pid for VCPU only on KVM_RUN ioctl

2014-08-07 Thread Paolo Bonzini
Il 07/08/2014 11:59, Christian Borntraeger ha scritto:
> Paolo,
> 
> are you willing to apply to kvm/queue?

I asked a question, but anyway... not until the end of the merge window
and my small vacation. :)

Paolo
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH/RFC] KVM: track pid for VCPU only on KVM_RUN ioctl

2014-08-07 Thread Paolo Bonzini
Il 05/08/2014 16:44, Christian Borntraeger ha scritto:
> We currently track the pid of the task that runs the VCPU in
> vcpu_load. Since we call vcpu_load for all kind of ioctls on a
> CPU, this causes hickups due to synchronize_rcu if one CPU is
> modified by another CPU or the main thread (e.g. initialization,
> reset). We track the pid only for the purpose of yielding, so
> let's update the pid only in the KVM_RUN ioctl.
> 
> In addition, don't do a synchronize_rcu on startup (pid == 0).

Speaking of QEMU, most ioctls should run from the VCPU anyway.  Which
ioctls do you see called from elsewhere?  What speedup can you see if
you just do the "no synchronize_rcu on pid == 0" part?

The patch may be okay, but I'm worried that it might be hiding a bug in
QEMU.

Paolo

> This speeds up guest boot time on s390 noticably for some configs, e.g.
> HZ=100, no full state tracking, 64 guest cpus 32 host cpus.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Query: Is it possible to lose interrupts between vhost and virtio_net during migration?

2014-08-07 Thread Zhangjie (HZ)

On 2014/8/5 20:14, Zhangjie (HZ) wrote:
> On 2014/8/5 17:49, Michael S. Tsirkin wrote:
>> On Tue, Aug 05, 2014 at 02:29:28PM +0800, Zhangjie (HZ) wrote:
>>> Jason is right, the new order is not the cause of network unreachable.
>>> Changing order seems not work. After about 40 times, the problem occurs 
>>> again.
>>> Maybe there is other hidden reasons for that.
> I modified the code to change the order myself yesterday.
> This result is about my code.
>>
>> To make sure, you tested the patch that I posted to list:
>> "vhost_net: stop guest notifiers after backend"?
>>
>> Please confirm.
>>
> OK, I will test with your patch "vhost_net: stop guest notifiers after 
> backend".
> 
Unfortunately, after using the patch "vhost_net: stop guest notifiers after 
backend",
Linux VMs stopt themselves a few minutes after they were started.
>@@ -308,6 +308,12 @@ int vhost_net_start(VirtIODevice *dev, NetClientState 
>*ncs,
> goto err;
> }
>
>+r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
>+if (r < 0) {
>+error_report("Error binding guest notifier: %d", -r);
>+goto err;
>+}
>+
> for (i = 0; i < total_queues; i++) {
> r = vhost_net_start_one(get_vhost_net(ncs[i].peer), dev, i * 2);
>
>@@ -316,12 +322,6 @@ int vhost_net_start(VirtIODevice *dev, NetClientState 
>*ncs,
> }
> }
>
>-r = k->set_guest_notifiers(qbus->parent, total_queues * 2, true);
>-if (r < 0) {
>-error_report("Error binding guest notifier: %d", -r);
>-goto err;
>-}
>-
> return 0;
I wonder if k->set_guest_notifiers should be called after "hdev->started = 
true;" in vhost_dev_start.
-- 
Best Wishes!
Zhang Jie

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[GIT PULL] Second round of KVM changes for 3.17

2014-08-07 Thread Paolo Bonzini
Linus,

The following changes since commit 5167d09ffad5b16b574d35ce3047ed34caf1e837:

  Merge tag 'arm64-upstream' of 
git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux (2014-08-04 12:31:53 
-0700)

are available in the git repository at:


  git://git.kernel.org/pub/scm/virt/kvm/kvm.git tags/for-linus

for you to fetch changes up to c77dcacb397519b6ade8f08201a4a90a7f4f751e:

  KVM: Move more code under CONFIG_HAVE_KVM_IRQFD (2014-08-06 14:24:47 +0200)


Here are the PPC and ARM changes for KVM, which I separated because
they had small conflicts (respectively within KVM documentation,
and with 3.16-rc changes).  Since they were all within the subsystem,
I took care of them.

Stephen Rothwell reported some snags in PPC builds, but they are all
fixed now; the latest linux-next report was clean.

New features for ARM include:
- KVM VGIC v2 emulation on GICv3 hardware
- Big-Endian support for arm/arm64 (guest and host)
- Debug Architecture support for arm64 (arm32 is on Christoffer's todo list)

And for PPC:
- Book3S: Good number of LE host fixes, enable HV on LE
- Book3S HV: Add in-guest debug support

This release drops support for KVM on the PPC440.  As a result, the
PPC merge removes more lines than it adds. :)

I also included an x86 change, since Davidlohr tied it to an independent
bug report and the reporter quickly provided a Tested-by; there was no
reason to wait for -rc2.


Alex Bennée (2):
  arm64: KVM: export demux regids as KVM_REG_ARM64
  arm64: KVM: allow export and import of generic timer regs

Alexander Graf (31):
  KVM: PPC: Book3s PR: Disable AIL mode with OPAL
  KVM: PPC: Book3s HV: Fix tlbie compile error
  KVM: PPC: Book3S PR: Handle hyp doorbell exits
  KVM: PPC: Book3S PR: Fix ABIv2 on LE
  KVM: PPC: Book3S PR: Fix sparse endian checks
  PPC: Add asm helpers for BE 32bit load/store
  KVM: PPC: Book3S HV: Make HTAB code LE host aware
  KVM: PPC: Book3S HV: Access guest VPA in BE
  KVM: PPC: Book3S HV: Access host lppaca and shadow slb in BE
  KVM: PPC: Book3S HV: Access XICS in BE
  KVM: PPC: Book3S HV: Fix ABIv2 on LE
  KVM: PPC: Book3S HV: Enable for little endian hosts
  KVM: PPC: Book3S: Move vcore definition to end of kvm_arch struct
  KVM: PPC: Deflect page write faults properly in kvmppc_st
  KVM: PPC: Book3S: Stop PTE lookup on write errors
  KVM: PPC: Book3S: Add hack for split real mode
  KVM: PPC: Book3S: Make magic page properly 4k mappable
  KVM: PPC: Remove 440 support
  KVM: Rename and add argument to check_extension
  KVM: Allow KVM_CHECK_EXTENSION on the vm fd
  KVM: PPC: Book3S: Provide different CAPs based on HV or PR mode
  KVM: PPC: Implement kvmppc_xlate for all targets
  KVM: PPC: Move kvmppc_ld/st to common code
  KVM: PPC: Remove kvmppc_bad_hva()
  KVM: PPC: Use kvm_read_guest in kvmppc_ld
  KVM: PPC: Handle magic page in kvmppc_ld/st
  KVM: PPC: Separate loadstore emulation from priv emulation
  KVM: PPC: Expose helper functions for data/inst faults
  KVM: PPC: Remove DCR handling
  KVM: PPC: HV: Remove generic instruction emulation
  KVM: PPC: PR: Handle FSCR feature deselects

Alexey Kardashevskiy (1):
  KVM: PPC: Book3S: Fix LPCR one_reg interface

Aneesh Kumar K.V (4):
  KVM: PPC: BOOK3S: PR: Fix PURR and SPURR emulation
  KVM: PPC: BOOK3S: PR: Emulate virtual timebase register
  KVM: PPC: BOOK3S: PR: Emulate instruction counter
  KVM: PPC: BOOK3S: HV: Update compute_tlbie_rb to handle 16MB base page

Anton Blanchard (2):
  KVM: PPC: Book3S HV: Fix ABIv2 indirect branch issue
  KVM: PPC: Assembly functions exported to modules need _GLOBAL_TOC()

Bharat Bhushan (10):
  kvm: ppc: bookehv: Added wrapper macros for shadow registers
  kvm: ppc: booke: Use the shared struct helpers of SRR0 and SRR1
  kvm: ppc: booke: Use the shared struct helpers of SPRN_DEAR
  kvm: ppc: booke: Add shared struct helpers of SPRN_ESR
  kvm: ppc: booke: Use the shared struct helpers for SPRN_SPRG0-7
  kvm: ppc: Add SPRN_EPR get helper function
  kvm: ppc: bookehv: Save restore SPRN_SPRG9 on guest entry exit
  KVM: PPC: Booke-hv: Add one reg interface for SPRG9
  KVM: PPC: Remove comment saying SPRG1 is used for vcpu pointer
  KVM: PPC: BOOKEHV: rename e500hv_spr to bookehv_spr

Christoffer Dall (2):
  arm/arm64: KVM: Fix and refactor unmap_range
  Merge tag 'deps-irqchip-gic-3.17' of 
git://git.infradead.org/users/jcooper/linux.git

Eric Auger (1):
  ARM: KVM: Unmap IPA on memslot delete/move

Kim Phillips (1):
  ARM: KVM: user_mem_abort: support stage 2 MMIO page mapping

Li Liu (1):
  ARM: virt: fix wrong HSCTLR.EE bit setting

Marc Zyngier (31):
  arm64: GICv3 device tree binding documentation
  arm64: boot protocol documen

Re: [PATCH/RFC] KVM: track pid for VCPU only on KVM_RUN ioctl

2014-08-07 Thread Christian Borntraeger
On 07/08/14 10:21, Raghavendra K T wrote:
> On 08/05/2014 08:14 PM, Christian Borntraeger wrote:
>> We currently track the pid of the task that runs the VCPU in
>> vcpu_load. Since we call vcpu_load for all kind of ioctls on a
>> CPU, this causes hickups due to synchronize_rcu if one CPU is
>> modified by another CPU or the main thread (e.g. initialization,
>> reset). We track the pid only for the purpose of yielding, so
>> let's update the pid only in the KVM_RUN ioctl.
>>
>> In addition, don't do a synchronize_rcu on startup (pid == 0).
>>
>> This speeds up guest boot time on s390 noticably for some configs, e.g.
>> HZ=100, no full state tracking, 64 guest cpus 32 host cpus.
>>
>> Signed-off-by: Christian Borntraeger 
>> CC: Rik van Riel 
>> CC: Raghavendra K T 
>> CC: Michael Mueller 
>> ---
> 
> Please feel free to add
> Reviewed-by: Raghavendra K T 
> 
> I could see very small improvement while testing 32 vcpu guest booting
> on x86 (16 pcpu host +ht).
> 
> I was just wondering whether somebody implementing vcpu hot plug would
> have to bother about this change, but could not see any. What do you
> think?

The yield code can handle pid == 0, so the new CPU wont be a yield candidate 
until run for the first time. So I guess this is ok.

Paolo,

are you willing to apply to kvm/queue?


Christian

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/6] IRQFD without IRQ routing, enabled for XICS

2014-08-07 Thread Paul Mackerras
On Wed, Aug 06, 2014 at 02:35:29PM +0200, Paolo Bonzini wrote:

> This doesn't build without CONFIG_KVM_XICS, so I had to apply a fixup
> patch to move some code from being under CONFIG_HAVE_KVM_IRQCHIP or
> unconditional, to being conditional on CONFIG_HAVE_KVM_IRQFD.  I'll send
> the patch shortly.

Oops, sorry, thanks for fixing it up.

> Also, please look into adding a trace_kvm_set_irq call to kvm_set_irq in
> arch/powerpc/kvm/book3s_xics.c.
> 
> I would have preferred these snags to be fixed before (especially since
> the breakage is simply with "make ppc64_defconfig"), but it's partly my
> fault for delaying the review for so long.  And the series is good work

... and I have been on vacation for the past three weeks, which didn't
help either.

> and a very nice cleanup of the irqfd architecture.
> 
> Assuming that the linux-next merge from tomorrow is fine, I'll send the
> second pull request to Linus.

Thanks,
Paul.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/5] kvmtool: ARM64: Fix compile error for aarch64

2014-08-07 Thread Will Deacon
On Thu, Aug 07, 2014 at 09:50:52AM +0100, Anup Patel wrote:
> On 6 August 2014 18:20, Will Deacon  wrote:
> > On Tue, Aug 05, 2014 at 09:49:56AM +0100, Anup Patel wrote:
> >> The __ARM64_SYS_REG() macro is already defined in uapi/asm/kvm.h
> >> of Linux-3.16-rcX hence remove it from arm/aarch64/kvm-cpu.c
> >
> > I've been carrying a similar patch in my kvmtool/arm branch, but upstream
> > kvmtool is still based on 3.13, so this isn't needed at the moment.
> >
> > Do you have a need for Pekka to merge in the latest kernel sources?
> >
> > Will
> 
> Yes, we should syncup KVMTOOL with latest kernel sources.
> 
> I want to be able to shutdown VM when using KVMTOOL. To do
> this we need to use PSCI-v0.2 from Guest kernel.

Ok. Pekka, could you merge in 3.16 to the kvmtool master branch please?
You'll need my patch below to resolve some ARM build fallout.

Cheers,

Will

--->8

>From e780359998667e3eb08d9d002398618a06a861b9 Mon Sep 17 00:00:00 2001
From: Will Deacon 
Date: Tue, 13 May 2014 12:06:06 +0100
Subject: [PATCH] kvm tools: arm: remove register accessor macros now that they
 are in uapi

The kernel now exposes register accessor macros in the uapi/ headers
for arm and arm64, so use those instead (and avoid the compile failure
from the duplicate definitions).

Signed-off-by: Will Deacon 
---
 tools/kvm/arm/aarch32/kvm-cpu.c | 15 +--
 tools/kvm/arm/aarch64/kvm-cpu.c | 15 ---
 2 files changed, 1 insertion(+), 29 deletions(-)

diff --git a/tools/kvm/arm/aarch32/kvm-cpu.c b/tools/kvm/arm/aarch32/kvm-cpu.c
index 464b473dc936..95fb1da5ba3d 100644
--- a/tools/kvm/arm/aarch32/kvm-cpu.c
+++ b/tools/kvm/arm/aarch32/kvm-cpu.c
@@ -7,25 +7,12 @@
 #define ARM_CORE_REG(x)(KVM_REG_ARM | KVM_REG_SIZE_U32 | 
KVM_REG_ARM_CORE | \
 KVM_REG_ARM_CORE_REG(x))
 
-#define ARM_CP15_REG_SHIFT_MASK(x,n)   \
-   (((x) << KVM_REG_ARM_ ## n ## _SHIFT) & KVM_REG_ARM_ ## n ## _MASK)
-
-#define __ARM_CP15_REG(op1,crn,crm,op2)\
-   (KVM_REG_ARM | KVM_REG_SIZE_U32 |   \
-(15 << KVM_REG_ARM_COPROC_SHIFT)   |   \
-ARM_CP15_REG_SHIFT_MASK(op1, OPC1) |   \
-ARM_CP15_REG_SHIFT_MASK(crn, 32_CRN)   |   \
-ARM_CP15_REG_SHIFT_MASK(crm, CRM)  |   \
-ARM_CP15_REG_SHIFT_MASK(op2, 32_OPC2))
-
-#define ARM_CP15_REG(...)  __ARM_CP15_REG(__VA_ARGS__)
-
 unsigned long kvm_cpu__get_vcpu_mpidr(struct kvm_cpu *vcpu)
 {
struct kvm_one_reg reg;
u32 mpidr;
 
-   reg.id = ARM_CP15_REG(ARM_CPU_ID, ARM_CPU_ID_MPIDR);
+   reg.id = ARM_CP15_REG32(ARM_CPU_ID, ARM_CPU_ID_MPIDR);
reg.addr = (u64)(unsigned long)&mpidr;
if (ioctl(vcpu->vcpu_fd, KVM_GET_ONE_REG, ®) < 0)
die("KVM_GET_ONE_REG failed (get_mpidr vcpu%ld", vcpu->cpu_id);
diff --git a/tools/kvm/arm/aarch64/kvm-cpu.c b/tools/kvm/arm/aarch64/kvm-cpu.c
index 71a2a3a7789d..1b293748efd6 100644
--- a/tools/kvm/arm/aarch64/kvm-cpu.c
+++ b/tools/kvm/arm/aarch64/kvm-cpu.c
@@ -15,21 +15,6 @@
 #define ARM64_CORE_REG(x)  (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
 KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 
-#define ARM64_SYS_REG_SHIFT_MASK(x,n)  \
-   (((x) << KVM_REG_ARM64_SYSREG_ ## n ## _SHIFT) &\
-KVM_REG_ARM64_SYSREG_ ## n ## _MASK)
-
-#define __ARM64_SYS_REG(op0,op1,crn,crm,op2)   \
-   (KVM_REG_ARM64 | KVM_REG_SIZE_U64   |   \
-KVM_REG_ARM64_SYSREG   |   \
-ARM64_SYS_REG_SHIFT_MASK(op0, OP0) |   \
-ARM64_SYS_REG_SHIFT_MASK(op1, OP1) |   \
-ARM64_SYS_REG_SHIFT_MASK(crn, CRN) |   \
-ARM64_SYS_REG_SHIFT_MASK(crm, CRM) |   \
-ARM64_SYS_REG_SHIFT_MASK(op2, OP2))
-
-#define ARM64_SYS_REG(...) __ARM64_SYS_REG(__VA_ARGS__)
-
 unsigned long kvm_cpu__get_vcpu_mpidr(struct kvm_cpu *vcpu)
 {
struct kvm_one_reg reg;
-- 
2.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] kvmtool: ARM/ARM64: Provide PSCI-0.2 guest when in-kernel KVM supports it

2014-08-07 Thread Will Deacon
On Thu, Aug 07, 2014 at 10:00:13AM +0100, Anup Patel wrote:
> On 6 August 2014 18:26, Will Deacon  wrote:
> > On Tue, Aug 05, 2014 at 09:49:59AM +0100, Anup Patel wrote:
> >> If in-kernel KVM support PSCI-0.2 emulation then we should set
> >> KVM_ARM_VCPU_PSCI_0_2 feature for each guest VCPU and also
> >> provide "arm,psci-0.2","arm,psci" as PSCI compatible string.
> >>
> >> This patch updates kvm_cpu__arch_init() and setup_fdt() as
> >> per above.
> >>
> >> Signed-off-by: Pranavkumar Sawargaonkar 
> >> Signed-off-by: Anup Patel 
> >> ---
> >>  tools/kvm/arm/fdt.c |   39 +--
> >>  tools/kvm/arm/kvm-cpu.c |5 +
> >>  2 files changed, 38 insertions(+), 6 deletions(-)
> >
> > [...]
> >
> >> diff --git a/tools/kvm/arm/kvm-cpu.c b/tools/kvm/arm/kvm-cpu.c
> >> index 7478f8f..76c28a0 100644
> >> --- a/tools/kvm/arm/kvm-cpu.c
> >> +++ b/tools/kvm/arm/kvm-cpu.c
> >> @@ -74,6 +74,11 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, 
> >> unsigned long cpu_id)
> >>   die("preferred target not available\n");
> >>   }
> >>
> >> + /* Set KVM_ARM_VCPU_PSCI_0_2 if available */
> >> + if (kvm__supports_extension(kvm, KVM_CAP_ARM_PSCI_0_2)) {
> >> + vcpu_init.features[0] |= (1UL << KVM_ARM_VCPU_PSCI_0_2);
> >> + }
> >
> > Where is this used?
> 
> If we want to provide PSCI-0.2 to Guest then we should inform
> in-kernel KVM ARM/ARM64 using init features.
> 
> By default KVM ARM/ARM64 provides PSCI-0.1 to Guest. If we don't set
> this feature then Guest will get undefined exception for PSCI-0.2
> calls.

Gotcha, thanks for the explanation.

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 2/6] ARM64: perf: Re-enable overflow interrupt from interrupt handler

2014-08-07 Thread Will Deacon
On Thu, Aug 07, 2014 at 10:03:58AM +0100, Anup Patel wrote:
> On 6 August 2014 19:54, Will Deacon  wrote:
> > On Tue, Aug 05, 2014 at 10:24:11AM +0100, Anup Patel wrote:
> >> A hypervisor will typically mask the overflow interrupt before
> >> forwarding it to Guest Linux hence we need to re-enable the overflow
> >> interrupt after clearing it in Guest Linux. Also, this re-enabling
> >> of overflow interrupt does not harm in non-virtualized scenarios.
> >>
> >> Signed-off-by: Pranavkumar Sawargaonkar 
> >> Signed-off-by: Anup Patel 
> >> ---
> >>  arch/arm64/kernel/perf_event.c |8 
> >>  1 file changed, 8 insertions(+)
> >>
> >> diff --git a/arch/arm64/kernel/perf_event.c 
> >> b/arch/arm64/kernel/perf_event.c
> >> index 47dfb8b..19fb140 100644
> >> --- a/arch/arm64/kernel/perf_event.c
> >> +++ b/arch/arm64/kernel/perf_event.c
> >> @@ -1076,6 +1076,14 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, 
> >> void *dev)
> >>   if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
> >>   continue;
> >>
> >> + /*
> >> +  * If we are running under a hypervisor such as KVM then
> >> +  * hypervisor will mask the interrupt before forwarding
> >> +  * it to Guest Linux hence re-enable interrupt for the
> >> +  * overflowed counter.
> >> +  */
> >> + armv8pmu_enable_intens(idx);
> >> +
> >
> > Really? This is a giant bodge in the guest to work around short-comings in
> > the hypervisor. Why can't we fix this properly using something like Marc's
> > irq forwarding code?
> 
> This change is in accordance with our previous RFC thread about
> PMU virtualization where Marc Z had suggest to do interrupt
> mask/unmask dance similar to arch-timer.
> 
> I have not tried Marc'z irq forwarding series. In next revision of this
> patchset, I will try to use Marc's irq forwarding approach.

That would be good. Judging by the colour Marc went when he saw this patch,
I don't think he intended you to hack perf in this way :)

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [RFC PATCH 2/6] ARM64: perf: Re-enable overflow interrupt from interrupt handler

2014-08-07 Thread Anup Patel
On 6 August 2014 19:54, Will Deacon  wrote:
> On Tue, Aug 05, 2014 at 10:24:11AM +0100, Anup Patel wrote:
>> A hypervisor will typically mask the overflow interrupt before
>> forwarding it to Guest Linux hence we need to re-enable the overflow
>> interrupt after clearing it in Guest Linux. Also, this re-enabling
>> of overflow interrupt does not harm in non-virtualized scenarios.
>>
>> Signed-off-by: Pranavkumar Sawargaonkar 
>> Signed-off-by: Anup Patel 
>> ---
>>  arch/arm64/kernel/perf_event.c |8 
>>  1 file changed, 8 insertions(+)
>>
>> diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c
>> index 47dfb8b..19fb140 100644
>> --- a/arch/arm64/kernel/perf_event.c
>> +++ b/arch/arm64/kernel/perf_event.c
>> @@ -1076,6 +1076,14 @@ static irqreturn_t armv8pmu_handle_irq(int irq_num, 
>> void *dev)
>>   if (!armv8pmu_counter_has_overflowed(pmovsr, idx))
>>   continue;
>>
>> + /*
>> +  * If we are running under a hypervisor such as KVM then
>> +  * hypervisor will mask the interrupt before forwarding
>> +  * it to Guest Linux hence re-enable interrupt for the
>> +  * overflowed counter.
>> +  */
>> + armv8pmu_enable_intens(idx);
>> +
>
> Really? This is a giant bodge in the guest to work around short-comings in
> the hypervisor. Why can't we fix this properly using something like Marc's
> irq forwarding code?

This change is in accordance with our previous RFC thread about
PMU virtualization where Marc Z had suggest to do interrupt
mask/unmask dance similar to arch-timer.

I have not tried Marc'z irq forwarding series. In next revision of this
patchset, I will try to use Marc's irq forwarding approach.

>
> Will

--
Anup
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] kvmtool: ARM64: Add target type potenza for aarch64

2014-08-07 Thread Will Deacon
On Thu, Aug 07, 2014 at 09:56:28AM +0100, Anup Patel wrote:
> On 6 August 2014 18:22, Will Deacon  wrote:
> > On Tue, Aug 05, 2014 at 09:49:57AM +0100, Anup Patel wrote:
> >> The VCPU target type KVM_ARM_TARGET_XGENE_POTENZA is available
> >> in latest Linux-3.16-rcX or higher hence register aarch64 target
> >> type for it.
> >>
> >> This patch enables us to run KVMTOOL on X-Gene Potenza host.
> >>
> >> Signed-off-by: Pranavkumar Sawargaonkar 
> >> Signed-off-by: Anup Patel 
> >> ---
> >>  tools/kvm/arm/aarch64/arm-cpu.c |9 -
> >>  1 file changed, 8 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/tools/kvm/arm/aarch64/arm-cpu.c 
> >> b/tools/kvm/arm/aarch64/arm-cpu.c
> >> index ce5ea2f..ce526e3 100644
> >> --- a/tools/kvm/arm/aarch64/arm-cpu.c
> >> +++ b/tools/kvm/arm/aarch64/arm-cpu.c
> >> @@ -41,10 +41,17 @@ static struct kvm_arm_target target_cortex_a57 = {
> >>   .init   = arm_cpu__vcpu_init,
> >>  };
> >>
> >> +static struct kvm_arm_target target_potenza = {
> >> + .id = KVM_ARM_TARGET_XGENE_POTENZA,
> >> + .compatible = "arm,arm-v8",
> >> + .init   = arm_cpu__vcpu_init,
> >> +};
> >
> > This implies you have the same PPIs for the arch-timer as the Cortex-A CPUs.
> > Is that right?
> 
> Currently, KVM ARM64 provides PPI27 as arch-time IRQ for all target types.
> 
> This will have to change if KVM ARM64 starts using different
> arch-timer PPI based on target type.

Oh, of course, these are virtual interrupt numbers. Ignore me!

Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/5] kvmtool: ARM/ARM64: Provide PSCI-0.2 guest when in-kernel KVM supports it

2014-08-07 Thread Anup Patel
On 6 August 2014 18:26, Will Deacon  wrote:
> On Tue, Aug 05, 2014 at 09:49:59AM +0100, Anup Patel wrote:
>> If in-kernel KVM support PSCI-0.2 emulation then we should set
>> KVM_ARM_VCPU_PSCI_0_2 feature for each guest VCPU and also
>> provide "arm,psci-0.2","arm,psci" as PSCI compatible string.
>>
>> This patch updates kvm_cpu__arch_init() and setup_fdt() as
>> per above.
>>
>> Signed-off-by: Pranavkumar Sawargaonkar 
>> Signed-off-by: Anup Patel 
>> ---
>>  tools/kvm/arm/fdt.c |   39 +--
>>  tools/kvm/arm/kvm-cpu.c |5 +
>>  2 files changed, 38 insertions(+), 6 deletions(-)
>
> [...]
>
>> diff --git a/tools/kvm/arm/kvm-cpu.c b/tools/kvm/arm/kvm-cpu.c
>> index 7478f8f..76c28a0 100644
>> --- a/tools/kvm/arm/kvm-cpu.c
>> +++ b/tools/kvm/arm/kvm-cpu.c
>> @@ -74,6 +74,11 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, 
>> unsigned long cpu_id)
>>   die("preferred target not available\n");
>>   }
>>
>> + /* Set KVM_ARM_VCPU_PSCI_0_2 if available */
>> + if (kvm__supports_extension(kvm, KVM_CAP_ARM_PSCI_0_2)) {
>> + vcpu_init.features[0] |= (1UL << KVM_ARM_VCPU_PSCI_0_2);
>> + }
>
> Where is this used?

If we want to provide PSCI-0.2 to Guest then we should inform
in-kernel KVM ARM/ARM64 using init features.

By default KVM ARM/ARM64 provides PSCI-0.1 to Guest. If we don't set
this feature then Guest will get undefined exception for PSCI-0.2
calls.

--
Anup

>
> Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/5] kvmtool: Handle exit reason KVM_EXIT_SYSTEM_EVENT

2014-08-07 Thread Anup Patel
On 6 August 2014 18:23, Will Deacon  wrote:
> On Tue, Aug 05, 2014 at 09:49:58AM +0100, Anup Patel wrote:
>> The KVM_EXIT_SYSTEM_EVENT exit reason was added to define
>> architecture independent system-wide events for a Guest.
>>
>> Currently, it is used by in-kernel PSCI-0.2 emulation of
>> KVM ARM/ARM64 to inform user space about PSCI SYSTEM_OFF
>> or PSCI SYSTEM_RESET request.
>>
>> For now, we simply treat all system-wide guest events as
>> same and shutdown the guest upon KVM_EXIT_SYSTEM_EVENT.
>>
>> Signed-off-by: Pranavkumar Sawargaonkar 
>> Signed-off-by: Anup Patel 
>> ---
>>  tools/kvm/kvm-cpu.c |6 ++
>>  1 file changed, 6 insertions(+)
>>
>> diff --git a/tools/kvm/kvm-cpu.c b/tools/kvm/kvm-cpu.c
>> index ee0a8ec..e20ee4b 100644
>> --- a/tools/kvm/kvm-cpu.c
>> +++ b/tools/kvm/kvm-cpu.c
>> @@ -160,6 +160,12 @@ int kvm_cpu__start(struct kvm_cpu *cpu)
>>   goto exit_kvm;
>>   case KVM_EXIT_SHUTDOWN:
>>   goto exit_kvm;
>> + case KVM_EXIT_SYSTEM_EVENT:
>> + /*
>> +  * Treat both SHUTDOWN & RESET system events
>> +  * as shutdown request.
>> +  */
>> + goto exit_kvm;
>
> Can we figure out whether this was a SHUTDOWN or RESET request? If so,
> printing a message for the latter "RESET request received -- exiting KVM"
> might be informative.

OK, I will update this and make it more verbose.

--
Anup

>
> Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/5] kvmtool: ARM64: Add target type potenza for aarch64

2014-08-07 Thread Anup Patel
On 6 August 2014 18:22, Will Deacon  wrote:
> On Tue, Aug 05, 2014 at 09:49:57AM +0100, Anup Patel wrote:
>> The VCPU target type KVM_ARM_TARGET_XGENE_POTENZA is available
>> in latest Linux-3.16-rcX or higher hence register aarch64 target
>> type for it.
>>
>> This patch enables us to run KVMTOOL on X-Gene Potenza host.
>>
>> Signed-off-by: Pranavkumar Sawargaonkar 
>> Signed-off-by: Anup Patel 
>> ---
>>  tools/kvm/arm/aarch64/arm-cpu.c |9 -
>>  1 file changed, 8 insertions(+), 1 deletion(-)
>>
>> diff --git a/tools/kvm/arm/aarch64/arm-cpu.c 
>> b/tools/kvm/arm/aarch64/arm-cpu.c
>> index ce5ea2f..ce526e3 100644
>> --- a/tools/kvm/arm/aarch64/arm-cpu.c
>> +++ b/tools/kvm/arm/aarch64/arm-cpu.c
>> @@ -41,10 +41,17 @@ static struct kvm_arm_target target_cortex_a57 = {
>>   .init   = arm_cpu__vcpu_init,
>>  };
>>
>> +static struct kvm_arm_target target_potenza = {
>> + .id = KVM_ARM_TARGET_XGENE_POTENZA,
>> + .compatible = "arm,arm-v8",
>> + .init   = arm_cpu__vcpu_init,
>> +};
>
> This implies you have the same PPIs for the arch-timer as the Cortex-A CPUs.
> Is that right?

Currently, KVM ARM64 provides PPI27 as arch-time IRQ for all target types.

This will have to change if KVM ARM64 starts using different
arch-timer PPI based on target type.

--
Anup

>
> Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/5] kvmtool: ARM: Use KVM_ARM_PREFERRED_TARGET vm ioctl to determine target cpu

2014-08-07 Thread Peter Maydell
On 7 August 2014 09:44, Anup Patel  wrote:
> The KVM_ARM_PREFERRED_TARGET ioctl is available from 3.13 onwards.
>
> I think we should first try KVM_ARM_PREFERRED_TARGET. If it fails then
> we should fallback to old method of trying each and every target type.

You don't need to try every target type, only the ones which
were implemented prior to PREFERRED_TARGET. So that's
just TARGET_CORTEX_A15 for 32 bit and TARGET_AEM_V8,
TARGET_FOUNDATION_V8 and TARGET_CORTEX_A57
for 64 bit.

thanks
-- PMM
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/5] kvmtool: ARM64: Fix compile error for aarch64

2014-08-07 Thread Anup Patel
On 6 August 2014 18:20, Will Deacon  wrote:
> On Tue, Aug 05, 2014 at 09:49:56AM +0100, Anup Patel wrote:
>> The __ARM64_SYS_REG() macro is already defined in uapi/asm/kvm.h
>> of Linux-3.16-rcX hence remove it from arm/aarch64/kvm-cpu.c
>
> I've been carrying a similar patch in my kvmtool/arm branch, but upstream
> kvmtool is still based on 3.13, so this isn't needed at the moment.
>
> Do you have a need for Pekka to merge in the latest kernel sources?
>
> Will

Yes, we should syncup KVMTOOL with latest kernel sources.

I want to be able to shutdown VM when using KVMTOOL. To do
this we need to use PSCI-v0.2 from Guest kernel.

--
Anup
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/5] kvmtool: ARM: Use KVM_ARM_PREFERRED_TARGET vm ioctl to determine target cpu

2014-08-07 Thread Anup Patel
On 6 August 2014 18:18, Will Deacon  wrote:
> On Tue, Aug 05, 2014 at 09:49:55AM +0100, Anup Patel wrote:
>> Instead, of trying out each and every target type we should use
>> KVM_ARM_PREFERRED_TARGET vm ioctl to determine target type
>> for KVM ARM/ARM64.
>>
>> We bail-out target type returned by KVM_ARM_PREFERRED_TARGET vm ioctl
>> is not known to kvmtool.
>
> -ENOPARSE

OK, I will fix the wordings here.

>
>> Signed-off-by: Pranavkumar Sawargaonkar 
>> Signed-off-by: Anup Patel 
>> ---
>>  tools/kvm/arm/kvm-cpu.c |   21 -
>>  1 file changed, 16 insertions(+), 5 deletions(-)
>>
>> diff --git a/tools/kvm/arm/kvm-cpu.c b/tools/kvm/arm/kvm-cpu.c
>> index aeaa4cf..7478f8f 100644
>> --- a/tools/kvm/arm/kvm-cpu.c
>> +++ b/tools/kvm/arm/kvm-cpu.c
>> @@ -34,6 +34,7 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, 
>> unsigned long cpu_id)
>>   struct kvm_cpu *vcpu;
>>   int coalesced_offset, mmap_size, err = -1;
>>   unsigned int i;
>> + struct kvm_vcpu_init preferred_init;
>>   struct kvm_vcpu_init vcpu_init = {
>>   .features = ARM_VCPU_FEATURE_FLAGS(kvm, cpu_id)
>>   };
>> @@ -46,6 +47,10 @@ struct kvm_cpu *kvm_cpu__arch_init(struct kvm *kvm, 
>> unsigned long cpu_id)
>>   if (vcpu->vcpu_fd < 0)
>>   die_perror("KVM_CREATE_VCPU ioctl");
>>
>> + err = ioctl(kvm->vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
>> + if (err < 0)
>> + die_perror("KVM_ARM_PREFERRED_TARGET ioctl");
>
> Is this ioctl always available? If not, I don't like dying here as that
> could cause a regression under older hosts.

The KVM_ARM_PREFERRED_TARGET ioctl is available from 3.13 onwards.

I think we should first try KVM_ARM_PREFERRED_TARGET. If it fails then
we should fallback to old method of trying each and every target type.
What say?

--
Anup

>
> Will
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH/RFC] KVM: track pid for VCPU only on KVM_RUN ioctl

2014-08-07 Thread Raghavendra K T

On 08/05/2014 08:14 PM, Christian Borntraeger wrote:

We currently track the pid of the task that runs the VCPU in
vcpu_load. Since we call vcpu_load for all kind of ioctls on a
CPU, this causes hickups due to synchronize_rcu if one CPU is
modified by another CPU or the main thread (e.g. initialization,
reset). We track the pid only for the purpose of yielding, so
let's update the pid only in the KVM_RUN ioctl.

In addition, don't do a synchronize_rcu on startup (pid == 0).

This speeds up guest boot time on s390 noticably for some configs, e.g.
HZ=100, no full state tracking, 64 guest cpus 32 host cpus.

Signed-off-by: Christian Borntraeger 
CC: Rik van Riel 
CC: Raghavendra K T 
CC: Michael Mueller 
---


Please feel free to add
Reviewed-by: Raghavendra K T 

I could see very small improvement while testing 32 vcpu guest booting
on x86 (16 pcpu host +ht).

I was just wondering whether somebody implementing vcpu hot plug would
have to bother about this change, but could not see any. What do you
think?

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html