date:20080928

kvm: External module: Compile irq_comm.c into external modules

2008-09-28 Thread Zhang, Xiantao

If the vt-d patchset is picked up, please apply this patch to
userspace.git for external module support. 
>From 36c21b2cb81077b176c7447ccc77ce2823c79af5 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <[EMAIL PROTECTED]>
Date: Mon, 29 Sep 2008 11:54:31 +0800
Subject: [PATCH] kvm: External module: Compile irq_comm.c into external
modules

Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]>
---
 kernel/ia64/Kbuild |2 +-
 kernel/x86/Kbuild  |2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/kernel/ia64/Kbuild b/kernel/ia64/Kbuild
index 8561476..e9660ba 100644
--- a/kernel/ia64/Kbuild
+++ b/kernel/ia64/Kbuild
@@ -1,7 +1,7 @@
 obj-m := kvm.o kvm-intel.o
 
 kvm-objs := kvm_main.o ioapic.o coalesced_mmio.o kvm-ia64.o kvm_fw.o \
-   ../anon_inodes.o ../external-module-compat.o
+   irq_comm.o ../anon_inodes.o ../external-module-compat.o
 
 EXTRA_CFLAGS_vcpu.o += -mfixed-range=f2-f5,f12-f127
 kvm-intel-objs := vmm.o vmm_ivt.o trampoline.o vcpu.o optvfault.o
mmio.o \
diff --git a/kernel/x86/Kbuild b/kernel/x86/Kbuild
index e0db12b..2369d00 100644
--- a/kernel/x86/Kbuild
+++ b/kernel/x86/Kbuild
@@ -4,7 +4,7 @@ include $(obj)/../config.kbuild
 
 obj-m := kvm.o kvm-intel.o kvm-amd.o
 kvm-objs := kvm_main.o x86.o mmu.o x86_emulate.o ../anon_inodes.o irq.o
i8259.o \
-lapic.o ioapic.o preempt.o i8254.o coalesced_mmio.o \
+lapic.o ioapic.o preempt.o i8254.o coalesced_mmio.o irq_comm.o
\
 ../external-module-compat.o
 ifeq ($(EXT_CONFIG_KVM_TRACE),y)
 kvm-objs += kvm_trace.o
-- 
1.5.1





0001-kvm-External-module-Compile-irq_comm.c-into-extern.patch
Description: 0001-kvm-External-module-Compile-irq_comm.c-into-extern.patch

[PATCH 8/8]kvm/ia64: Add intel iommu support for guests.

2008-09-28 Thread Zhang, Xiantao

>From f9d62c51e6c73f62c865b7f5b45440a2d3add4bb Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <[EMAIL PROTECTED]>
Date: Mon, 29 Sep 2008 11:00:57 +0800
Subject: [PATCH] kvm/ia64: Add intel iommu support for guests.

With intel iommu hardware, we can assign devices to kvm/ia64 guests.
Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]>
---
 arch/ia64/kvm/Makefile   |4 
 arch/ia64/kvm/kvm-ia64.c |9 +
 2 files changed, 13 insertions(+), 0 deletions(-)

diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index 3b1a1c1..cf37f8f 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -46,6 +46,10 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
coalesced_mmio.o irq_comm.o)
 
+ifeq ($(CONFIG_DMAR),y)
+common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+endif
+
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
 obj-$(CONFIG_KVM) += kvm.o
 
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d3c35eb..3f80eaf 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -31,6 +31,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -187,6 +188,9 @@ int kvm_dev_ioctl_check_extension(long ext)
case KVM_CAP_COALESCED_MMIO:
r = KVM_COALESCED_MMIO_PAGE_OFFSET;
break;
+   case KVM_CAP_IOMMU:
+   r = intel_iommu_found();
+   break;
default:
r = 0;
}
@@ -773,6 +777,7 @@ static void kvm_init_vm(struct kvm *kvm)
 */
kvm_build_io_pmt(kvm);
 
+   INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
 }
 
 struct  kvm *kvm_arch_create_vm(void)
@@ -1336,6 +1341,10 @@ static void kvm_release_vm_pages(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
+   kvm_iommu_unmap_guest(kvm);
+#ifdef KVM_CAP_DEVICE_ASSIGNMENT
+   kvm_free_all_assigned_devices(kvm);
+#endif
kfree(kvm->arch.vioapic);
kvm_release_vm_pages(kvm);
kvm_free_physmem(kvm);
-- 
1.5.1





0008-kvm-ia64-Add-intel-iommu-support-for-guests.patch
Description: 0008-kvm-ia64-Add-intel-iommu-support-for-guests.patch

[PATCH 7/8]kvm/ia64: Add directed mmio range support for kvm guests.

2008-09-28 Thread Zhang, Xiantao

>From a9a2b3445876d788f076893c471f03c4abcfeddf Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <[EMAIL PROTECTED]>
Date: Sun, 28 Sep 2008 01:39:46 -0700
Subject: [PATCH] kvm/ia64: Add directed mmio range support for kvm
guests.

Using vt-d, kvm guests can be assigned physcial devices, so
this patch introduce a new mmio type(directed mmio)
to handle its mmio access.
Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]>
---
 arch/ia64/include/asm/kvm_host.h |2 +-
 arch/ia64/kvm/kvm-ia64.c |4 ++--
 arch/ia64/kvm/vcpu.h |   26 +-
 arch/ia64/kvm/vtlb.c |   23 +--
 4 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/arch/ia64/include/asm/kvm_host.h
b/arch/ia64/include/asm/kvm_host.h
index da579a3..85db124 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -132,7 +132,7 @@
 #define GPFN_IOSAPIC(4UL << 60) /* IOSAPIC base */
 #define GPFN_LEGACY_IO  (5UL << 60) /* Legacy I/O base */
 #define GPFN_GFW(6UL << 60) /* Guest Firmware */
-#define GPFN_HIGH_MMIO  (7UL << 60) /* High MMIO range */
+#define GPFN_PHYS_MMIO  (7UL << 60) /* Directed MMIO Range */
 
 #define GPFN_IO_MASK(7UL << 60) /* Guest pfn is I/O type */
 #define GPFN_INV_MASK   (1UL << 63) /* Guest pfn is invalid */
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 20622d6..d3c35eb 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1446,11 +1446,11 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
if (!kvm_is_mmio_pfn(pfn)) {
kvm_set_pmt_entry(kvm, base_gfn + i,
pfn << PAGE_SHIFT,
-   _PAGE_MA_WB);
+   _PAGE_AR_RWX | _PAGE_MA_WB);
memslot->rmap[i] = (unsigned
long)pfn_to_page(pfn);
} else {
kvm_set_pmt_entry(kvm, base_gfn + i,
-   GPFN_LOW_MMIO | (pfn <<
PAGE_SHIFT),
+   GPFN_PHYS_MMIO | (pfn <<
PAGE_SHIFT),
_PAGE_MA_UC);
memslot->rmap[i] = 0;
}
diff --git a/arch/ia64/kvm/vcpu.h b/arch/ia64/kvm/vcpu.h
index b0fcfb6..341e3fe 100644
--- a/arch/ia64/kvm/vcpu.h
+++ b/arch/ia64/kvm/vcpu.h
@@ -313,21 +313,21 @@ static inline void vcpu_set_tr(struct thash_data
*trp, u64 pte, u64 itir,
trp->rid = rid;
 }
 
-extern u64 kvm_lookup_mpa(u64 gpfn);
-extern u64 kvm_gpa_to_mpa(u64 gpa);
-
-/* Return I/O type if trye */
-#define __gpfn_is_io(gpfn) \
-   ({  \
-u64 pte, ret = 0;  \
-pte = kvm_lookup_mpa(gpfn);\
-if (!(pte & GPFN_INV_MASK))\
-ret = pte & GPFN_IO_MASK;  \
-ret;   \
-})
+extern u64 kvm_get_mpt_entry(u64 gpfn);
 
+/* Return I/ */
+static inline u64 __gpfn_is_io(u64 gpfn)
+{
+   u64  pte;
+   pte = kvm_get_mpt_entry(gpfn);
+   if (!(pte & GPFN_INV_MASK)) {
+   pte = pte & GPFN_IO_MASK;
+   if (pte != GPFN_PHYS_MMIO)
+   return pte;
+   }
+   return 0;
+}
 #endif
-
 #define IA64_NO_FAULT  0
 #define IA64_FAULT 1
 
diff --git a/arch/ia64/kvm/vtlb.c b/arch/ia64/kvm/vtlb.c
index def4576..dc58dac 100644
--- a/arch/ia64/kvm/vtlb.c
+++ b/arch/ia64/kvm/vtlb.c
@@ -390,7 +390,7 @@ void thash_purge_entries_remote(struct kvm_vcpu *v,
u64 va, u64 ps)
 
 u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
 {
-   u64 ps, ps_mask, paddr, maddr;
+   u64 ps, ps_mask, paddr, maddr, io_mask;
union pte_flags phy_pte;
 
ps = itir_ps(itir);
@@ -398,8 +398,9 @@ u64 translate_phy_pte(u64 *pte, u64 itir, u64 va)
phy_pte.val = *pte;
paddr = *pte;
paddr = ((paddr & _PAGE_PPN_MASK) & ps_mask) | (va & ~ps_mask);
-   maddr = kvm_lookup_mpa(paddr >> PAGE_SHIFT);
-   if (maddr & GPFN_IO_MASK) {
+   maddr = kvm_get_mpt_entry(paddr >> PAGE_SHIFT);
+   io_mask = maddr & GPFN_IO_MASK;
+   if (io_mask && (io_mask != GPFN_PHYS_MMIO)) {
*pte |= VTLB_PTE_IO;
return -1;
}
@@ -418,7 +419,7 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64
pte, u64 itir,
u64 ifa, int type)
 {
u64 ps;
-   u64 phy_pte;
+   u64 phy_pte, io_mask, index;
union ia64_rr vrr, mrr;
int ret = 0;
 
@@ -426,13 +427,16 @@ int thash_purge_and_insert(struct kvm_vcpu *v, u64
pte, u64 itir,
vrr.val = vcpu_get_rr(v, ifa);
mrr.val = ia64_get_rr(ifa);
 
+   index = (pte & _PAGE_PPN_MASK) >> PAGE_SHIFT;
+   io_mask = kvm_get_mpt_entry(index) & GPFN_IO_MASK;
phy_pte = translate_phy_pte(&pt

[PATCH 6/8]kvm/ia64: Make pmt table be able to hold physical mmio entries.

2008-09-28 Thread Zhang, Xiantao

>From c459cae4b89b445a2b85be915b269676b6ff394f Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <[EMAIL PROTECTED]>
Date: Sat, 27 Sep 2008 12:52:35 +0800
Subject: [PATCH] kvm/ia64: Make pmt table be able to hold physical mmio
entries.

Don't try to do put_page once the entries are mmio.
Set the tag to indicate the mmio space for vmm setting
TLB's memory attribute.
Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]>
---
 arch/ia64/kvm/kvm-ia64.c |   20 +---
 1 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index a6cf719..20622d6 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1437,17 +1437,23 @@ int kvm_arch_set_memory_region(struct kvm *kvm,
int user_alloc)
 {
unsigned long i;
-   struct page *page;
+   unsigned long pfn;
int npages = mem->memory_size >> PAGE_SHIFT;
struct kvm_memory_slot *memslot = &kvm->memslots[mem->slot];
unsigned long base_gfn = memslot->base_gfn;
-
for (i = 0; i < npages; i++) {
-   page = gfn_to_page(kvm, base_gfn + i);
-   kvm_set_pmt_entry(kvm, base_gfn + i,
-   page_to_pfn(page) << PAGE_SHIFT,
-   _PAGE_AR_RWX|_PAGE_MA_WB);
-   memslot->rmap[i] = (unsigned long)page;
+   pfn = gfn_to_pfn(kvm, base_gfn + i);
+   if (!kvm_is_mmio_pfn(pfn)) {
+   kvm_set_pmt_entry(kvm, base_gfn + i,
+   pfn << PAGE_SHIFT,
+   _PAGE_MA_WB);
+   memslot->rmap[i] = (unsigned
long)pfn_to_page(pfn);
+   } else {
+   kvm_set_pmt_entry(kvm, base_gfn + i,
+   GPFN_LOW_MMIO | (pfn <<
PAGE_SHIFT),
+   _PAGE_MA_UC);
+   memslot->rmap[i] = 0;
+   }
}
 
return 0;
-- 
1.5.1





0006-kvm-ia64-Make-pmt-table-be-able-to-hold-physical-mm.patch
Description: 0006-kvm-ia64-Make-pmt-table-be-able-to-hold-physical-mm.patch

[PATCH 5/8]kvm: Moving irqchip_in_kernel from ioapic.h to irq.h

2008-09-28 Thread Zhang, Xiantao

>From 458f35267872176ed470fd6c5156ccc874ce1dfe Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <[EMAIL PROTECTED]>
Date: Sat, 27 Sep 2008 11:46:36 +0800
Subject: [PATCH] kvm: Moving irqchip_in_kernel from ioapic.h to irq.h

Moving irqchip_in_kernel from ioapic.h to irq.h.

Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]>
---
 arch/ia64/kvm/irq.h  |4 
 arch/ia64/kvm/kvm-ia64.c |1 +
 virt/kvm/ioapic.h|7 ---
 3 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
index 604329a..c6786e8 100644
--- a/arch/ia64/kvm/irq.h
+++ b/arch/ia64/kvm/irq.h
@@ -23,5 +23,9 @@
 #ifndef __IRQ_H
 #define __IRQ_H
 
+static inline int irqchip_in_kernel(struct kvm *kvm)
+{
+   return 1;
+}
 
 #endif
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 7ad759e..a6cf719 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -45,6 +45,7 @@
 #include "iodev.h"
 #include "ioapic.h"
 #include "lapic.h"
+#include "irq.h"
 
 static unsigned long kvm_vmm_base;
 static unsigned long kvm_vsa_base;
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
index b52732f..cd7ae76 100644
--- a/virt/kvm/ioapic.h
+++ b/virt/kvm/ioapic.h
@@ -79,13 +79,6 @@ static inline struct kvm_ioapic
*ioapic_irqchip(struct kvm *kvm)
return kvm->arch.vioapic;
 }
 
-#ifdef CONFIG_IA64
-static inline int irqchip_in_kernel(struct kvm *kvm)
-{
-   return 1;
-}
-#endif
-
 struct kvm_vcpu *kvm_get_lowest_prio_vcpu(struct kvm *kvm, u8 vector,
   unsigned long bitmap);
 void kvm_ioapic_update_eoi(struct kvm *kvm, int vector, int
trigger_mode);
-- 
1.5.1



0005-kvm-Moving-irqchip_in_kernel-from-ioapic.h-to-irq.h.patch
Description: 0005-kvm-Moving-irqchip_in_kernel-from-ioapic.h-to-irq.h.patch

[PATCH 4/8]kvm: Split arch/x86/kvm/irq.c to two parts.

2008-09-28 Thread Zhang, Xiantao

>From bb0c01b997d16ff1c1b9b0e797a581577c385b54 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <[EMAIL PROTECTED]>
Date: Mon, 29 Sep 2008 10:59:30 +0800
Subject: [PATCH]  kvm: Split arch/x86/kvm/irq.c to two parts.

Moving irq ack notification logic as common, and make
it shared with ia64 side.

Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]>
---
 arch/ia64/include/asm/kvm_host.h |4 
 arch/ia64/kvm/Makefile   |2 +-
 arch/ia64/kvm/irq.h  |5 -
 arch/x86/kvm/Makefile|2 +-
 arch/x86/kvm/irq.c   |   33
-
 arch/x86/kvm/irq.h   |8 
 include/asm-x86/kvm_host.h   |2 ++
 include/linux/kvm_host.h |6 ++
 8 files changed, 14 insertions(+), 48 deletions(-)

diff --git a/arch/ia64/include/asm/kvm_host.h
b/arch/ia64/include/asm/kvm_host.h
index 1efe513..da579a3 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -413,6 +413,10 @@ struct kvm_arch {
struct kvm_ioapic *vioapic;
struct kvm_vm_stat stat;
struct kvm_sal_data rdv_sal_data;
+
+   struct list_head assigned_dev_head;
+   struct dmar_domain *intel_iommu_domain;
+   struct hlist_head irq_ack_notifier_list;
 };
 
 union cpuid3_t {
diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile
index bf22fb9..3b1a1c1 100644
--- a/arch/ia64/kvm/Makefile
+++ b/arch/ia64/kvm/Makefile
@@ -44,7 +44,7 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-   coalesced_mmio.o)
+   coalesced_mmio.o irq_comm.o)
 
 kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o
 obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/ia64/kvm/irq.h b/arch/ia64/kvm/irq.h
index f2e6545..604329a 100644
--- a/arch/ia64/kvm/irq.h
+++ b/arch/ia64/kvm/irq.h
@@ -23,10 +23,5 @@
 #ifndef __IRQ_H
 #define __IRQ_H
 
-struct kvm;
-
-static inline void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
-{
-}
 
 #endif
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 7dce593..c023435 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -3,7 +3,7 @@
 #
 
 common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
-coalesced_mmio.o)
+coalesced_mmio.o irq_comm.o)
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
diff --git a/arch/x86/kvm/irq.c b/arch/x86/kvm/irq.c
index 8c1b9c5..c019b8e 100644
--- a/arch/x86/kvm/irq.c
+++ b/arch/x86/kvm/irq.c
@@ -99,36 +99,3 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu)
__kvm_migrate_apic_timer(vcpu);
__kvm_migrate_pit_timer(vcpu);
 }
-
-/* This should be called with the kvm->lock mutex held */
-void kvm_set_irq(struct kvm *kvm, int irq, int level)
-{
-   /* Not possible to detect if the guest uses the PIC or the
-* IOAPIC.  So set the bit in both. The guest will ignore
-* writes to the unused one.
-*/
-   kvm_ioapic_set_irq(kvm->arch.vioapic, irq, level);
-   kvm_pic_set_irq(pic_irqchip(kvm), irq, level);
-}
-
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi)
-{
-   struct kvm_irq_ack_notifier *kian;
-   struct hlist_node *n;
-
-   hlist_for_each_entry(kian, n, &kvm->arch.irq_ack_notifier_list,
link)
-   if (kian->gsi == gsi)
-   kian->irq_acked(kian);
-}
-
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-  struct kvm_irq_ack_notifier *kian)
-{
-   hlist_add_head(&kian->link, &kvm->arch.irq_ack_notifier_list);
-}
-
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-struct kvm_irq_ack_notifier *kian)
-{
-   hlist_del(&kian->link);
-}
diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
index 4748532..f17c8f5 100644
--- a/arch/x86/kvm/irq.h
+++ b/arch/x86/kvm/irq.h
@@ -68,7 +68,6 @@ struct kvm_pic {
 };
 
 struct kvm_pic *kvm_create_pic(struct kvm *kvm);
-void kvm_pic_set_irq(void *opaque, int irq, int level);
 int kvm_pic_read_irq(struct kvm *kvm);
 void kvm_pic_update_irq(struct kvm_pic *s);
 void kvm_pic_clear_isr_ack(struct kvm *kvm);
@@ -85,13 +84,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
 
 void kvm_pic_reset(struct kvm_kpic_state *s);
 
-void kvm_set_irq(struct kvm *kvm, int irq, int level);
-void kvm_notify_acked_irq(struct kvm *kvm, unsigned gsi);
-void kvm_register_irq_ack_notifier(struct kvm *kvm,
-  struct kvm_irq_ack_notifier *kian);
-void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
-struct kvm_irq_ack_notifier *kian);
-
 void kvm_timer_intr_post(struct kvm_vcpu *vcpu, int vec);
 void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
 void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
diff --git a/include/asm-x86/kvm_host.h

[PATCH 3/8]kvm: Changing is_mmio_pfn to kvm_is_mmio_pfn, and make it common

2008-09-28 Thread Zhang, Xiantao

>From 3211cc7a86ba050114733f847361478ab25619d7 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <[EMAIL PROTECTED]>
Date: Sat, 27 Sep 2008 10:55:40 +0800
Subject: [PATCH] kvm: Changing is_mmio_pfn to kvm_is_mmio_pfn, and make
it common

for all archs.

Using kvm prefix to aovid polluting kernel's name space.
Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]>
---
 include/asm-x86/kvm_host.h |2 --
 include/linux/kvm_host.h   |2 ++
 virt/kvm/kvm_main.c|   16 
 virt/kvm/vtd.c |4 ++--
 4 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 1b114ca..b507b8f 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -502,8 +502,6 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t
gpa,
 int kvm_pv_mmu_op(struct kvm_vcpu *vcpu, unsigned long bytes,
  gpa_t addr, unsigned long *ret);
 
-int is_mmio_pfn(pfn_t pfn);
-
 extern bool tdp_enabled;
 
 enum emulation_result {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 10c1146..b3b7598 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -288,6 +288,8 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *v);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
+int kvm_is_mmio_pfn(pfn_t pfn);
+
 struct kvm_irq_ack_notifier {
struct hlist_node link;
unsigned gsi;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 59e08a4..cf0ab8e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -316,7 +316,7 @@ static inline int valid_vcpu(int n)
return likely(n >= 0 && n < KVM_MAX_VCPUS);
 }
 
-inline int is_mmio_pfn(pfn_t pfn)
+inline int kvm_is_mmio_pfn(pfn_t pfn)
 {
if (pfn_valid(pfn))
return PageReserved(pfn_to_page(pfn));
@@ -994,7 +994,7 @@ pfn_t gfn_to_pfn(struct kvm *kvm, gfn_t gfn)
 
pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
vma->vm_pgoff;
up_read(¤t->mm->mmap_sem);
-   BUG_ON(!is_mmio_pfn(pfn));
+   BUG_ON(!kvm_is_mmio_pfn(pfn));
} else
pfn = page_to_pfn(page[0]);
 
@@ -1008,10 +1008,10 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t
gfn)
pfn_t pfn;
 
pfn = gfn_to_pfn(kvm, gfn);
-   if (!is_mmio_pfn(pfn))
+   if (!kvm_is_mmio_pfn(pfn))
return pfn_to_page(pfn);
 
-   WARN_ON(is_mmio_pfn(pfn));
+   WARN_ON(kvm_is_mmio_pfn(pfn));
 
get_page(bad_page);
return bad_page;
@@ -1027,7 +1027,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
 void kvm_release_pfn_clean(pfn_t pfn)
 {
-   if (!is_mmio_pfn(pfn))
+   if (!kvm_is_mmio_pfn(pfn))
put_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
@@ -1053,7 +1053,7 @@ EXPORT_SYMBOL_GPL(kvm_set_page_dirty);
 
 void kvm_set_pfn_dirty(pfn_t pfn)
 {
-   if (!is_mmio_pfn(pfn)) {
+   if (!kvm_is_mmio_pfn(pfn)) {
struct page *page = pfn_to_page(pfn);
if (!PageReserved(page))
SetPageDirty(page);
@@ -1063,14 +1063,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
 
 void kvm_set_pfn_accessed(pfn_t pfn)
 {
-   if (!is_mmio_pfn(pfn))
+   if (!kvm_is_mmio_pfn(pfn))
mark_page_accessed(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
 
 void kvm_get_pfn(pfn_t pfn)
 {
-   if (!is_mmio_pfn(pfn))
+   if (!kvm_is_mmio_pfn(pfn))
get_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_get_pfn);
diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c
index 667bf3f..b3f4948 100644
--- a/virt/kvm/vtd.c
+++ b/virt/kvm/vtd.c
@@ -48,11 +48,11 @@ int kvm_iommu_map_pages(struct kvm *kvm,
/* check if already mapped */
pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
 gfn_to_gpa(gfn));
-   if (pfn && !is_mmio_pfn(pfn))
+   if (pfn && !kvm_is_mmio_pfn(pfn))
continue;
 
pfn = gfn_to_pfn(kvm, gfn);
-   if (!is_mmio_pfn(pfn)) {
+   if (!kvm_is_mmio_pfn(pfn)) {
r = intel_iommu_page_mapping(domain,
 gfn_to_gpa(gfn),
 pfn_to_hpa(pfn),
-- 
1.5.1





0003-kvm-Changing-is_mmio_pfn-to-kvm_is_mmio_pfn-and-ma.patch
Description: 0003-kvm-Changing-is_mmio_pfn-to-kvm_is_mmio_pfn-and-ma.patch

[PATCH 2/8]kvm: Moving device_assignment logic to kvm_main.c

2008-09-28 Thread Zhang, Xiantao

>From 6840c86b777e4a561cc3df7222cf5eb0b0bb9226 Mon Sep 17 00:00:00 2001
From: Xiantao Zhang <[EMAIL PROTECTED]>
Date: Sat, 27 Sep 2008 10:59:36 +0800
Subject: [PATCH] kvm: Moving device_assignment logic to kvm_main.c

To share with other archs, this patch moves device_assignment
logic to common parts.
Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86.c   |  255
---
 include/linux/kvm.h  |2 +
 include/linux/kvm_host.h |1 +
 virt/kvm/kvm_main.c  |  268
+-
 4 files changed, 269 insertions(+), 257 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 4cfdd1b..aee0e37 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -30,7 +30,6 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 #include 
@@ -107,238 +106,6 @@ struct kvm_stats_debugfs_item debugfs_entries[] =
{
{ NULL }
 };
 
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct
list_head *head,
- int
assigned_dev_id)
-{
-   struct list_head *ptr;
-   struct kvm_assigned_dev_kernel *match;
-
-   list_for_each(ptr, head) {
-   match = list_entry(ptr, struct kvm_assigned_dev_kernel,
list);
-   if (match->assigned_dev_id == assigned_dev_id)
-   return match;
-   }
-   return NULL;
-}
-
-static void kvm_assigned_dev_interrupt_work_handler(struct work_struct
*work)
-{
-   struct kvm_assigned_dev_kernel *assigned_dev;
-
-   assigned_dev = container_of(work, struct
kvm_assigned_dev_kernel,
-   interrupt_work);
-
-   /* This is taken to safely inject irq inside the guest. When
-* the interrupt injection (or the ioapic code) uses a
-* finer-grained lock, update this
-*/
-   mutex_lock(&assigned_dev->kvm->lock);
-   kvm_set_irq(assigned_dev->kvm,
-   assigned_dev->guest_irq, 1);
-   mutex_unlock(&assigned_dev->kvm->lock);
-   kvm_put_kvm(assigned_dev->kvm);
-}
-
-/* FIXME: Implement the OR logic needed to make shared interrupts on
- * this line behave properly
- */
-static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
-{
-   struct kvm_assigned_dev_kernel *assigned_dev =
-   (struct kvm_assigned_dev_kernel *) dev_id;
-
-   kvm_get_kvm(assigned_dev->kvm);
-   schedule_work(&assigned_dev->interrupt_work);
-   disable_irq_nosync(irq);
-   return IRQ_HANDLED;
-}
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
-   struct kvm_assigned_dev_kernel *dev;
-
-   if (kian->gsi == -1)
-   return;
-
-   dev = container_of(kian, struct kvm_assigned_dev_kernel,
-  ack_notifier);
-   kvm_set_irq(dev->kvm, dev->guest_irq, 0);
-   enable_irq(dev->host_irq);
-}
-
-static void kvm_free_assigned_device(struct kvm *kvm,
-struct kvm_assigned_dev_kernel
-*assigned_dev)
-{
-   if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
-   free_irq(assigned_dev->host_irq, (void *)assigned_dev);
-
-   kvm_unregister_irq_ack_notifier(kvm,
&assigned_dev->ack_notifier);
-
-   if (cancel_work_sync(&assigned_dev->interrupt_work))
-   /* We had pending work. That means we will have to take
-* care of kvm_put_kvm.
-*/
-   kvm_put_kvm(kvm);
-
-   pci_release_regions(assigned_dev->dev);
-   pci_disable_device(assigned_dev->dev);
-   pci_dev_put(assigned_dev->dev);
-
-   list_del(&assigned_dev->list);
-   kfree(assigned_dev);
-}
-
-static void kvm_free_all_assigned_devices(struct kvm *kvm)
-{
-   struct list_head *ptr, *ptr2;
-   struct kvm_assigned_dev_kernel *assigned_dev;
-
-   list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
-   assigned_dev = list_entry(ptr,
- struct
kvm_assigned_dev_kernel,
- list);
-
-   kvm_free_assigned_device(kvm, assigned_dev);
-   }
-}
-
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
-  struct kvm_assigned_irq
-  *assigned_irq)
-{
-   int r = 0;
-   struct kvm_assigned_dev_kernel *match;
-
-   mutex_lock(&kvm->lock);
-
-   match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
- assigned_irq->assigned_dev_id);
-   if (!match) {
-   mutex_unlock(&kvm->lock);
-   return -EINVAL;
-   }
-
-   if (match->irq_requested) {
-   match->guest_irq = assigned_irq->guest_irq;
-   match->ack_notifier.gsi = assigned_irq->guest_irq;
-

[PATCH 1/8]kvm/vt-d: Moving vtd.c from arch/x86/kvm/ to virt/kvm/

2008-09-28 Thread Zhang, Xiantao

>From c7e6b365ce145caead0355b87e873c3180a47c5b Mon Sep 17 00:00:00 2001
From: Zhang xiantao <[EMAIL PROTECTED]>
Date: Thu, 11 Sep 2008 13:19:32 +0800
Subject: [PATCH] kvm/vt-d: Moving vtd.c from arch/x86/kvm/ to virt/kvm/

Preparation for kvm/ia64 VT-d support.

Signed-off-by: Zhang xiantao <[EMAIL PROTECTED]>
---
 arch/x86/kvm/Makefile|6 +++---
 {arch/x86 => virt}/kvm/vtd.c |0 
 2 files changed, 3 insertions(+), 3 deletions(-)
 rename {arch/x86 => virt}/kvm/vtd.c (100%)

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 3072b17..7dce593 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,14 +7,14 @@ common-objs = $(addprefix ../../../virt/kvm/,
kvm_main.o ioapic.o \
 ifeq ($(CONFIG_KVM_TRACE),y)
 common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o)
 endif
+ifeq ($(CONFIG_DMAR),y)
+common-objs += $(addprefix ../../../virt/kvm/, vtd.o)
+endif
 
 EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o
lapic.o \
i8254.o
-ifeq ($(CONFIG_DMAR),y)
-kvm-objs += vtd.o
-endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/virt/kvm/vtd.c
similarity index 100%
rename from arch/x86/kvm/vtd.c
rename to virt/kvm/vtd.c
-- 
1.5.1



0001-kvm-vt-d-Moving-vtd.c-from-arch-x86-kvm-to-virt-kv.patch
Description: 0001-kvm-vt-d-Moving-vtd.c-from-arch-x86-kvm-to-virt-kv.patch

[PATCH 0/8] Patchset to enable vt-d support for kvm/ia64.

2008-09-28 Thread Zhang, Xiantao

In order to enable vt-d suport for kvm/ia64 guests, I worked out the
patchset to make it happen. Please review. The first five patches have
no changes for logic and just do code move.
Xiantao
[PATCH 1/8] kvm/vt-d: Moving vtd.c from arch/x86/kvm/ to virt/kvm/
[PATCH 2/8] kvm: Moving device_assignment logic to kvm_main.c
[PATCH 3/8] kvm: Changing is_mmio_pfn to kvm_is_mmio_pfn, and make it
common
[PATCH 4/8] kvm: Split arch/x86/kvm/irq.c to two parts.
[PATCH 5/8] kvm: Moving irqchip_in_kernel from ioapic.h to irq.h
[PATCH 6/8] kvm/ia64: Make pmt table be able to hold physical mmio
entries.
[PATCH 7/8] kvm/ia64: Add directed mmio range support for kvm guests.
[PATCH 8/8] kvm/ia64: Add intel iommu support for guests
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

installing kvm-76 on 2.6.24

2008-09-28 Thread Sterling Windmill

I am running a custom compiled 2.6.24 kernel on a 64-bit Intel system and had 
no issues compling and running kvm-75. 

I downloaded kvm-76 and compiled and installed it, but I cannot get the modules 
to load properly. 

I have temporarily gone back to kvm-75. 

I am seeing this output in dmesg: 

kvm: Unknown symbol kvm_iommu_map_pages 
kvm: Unknown symbol kvm_iommu_map_guest 
kvm: Unknown symbol kvm_iommu_unmap_guest 
kvm_intel: Unknown symbol kvm_clear_guest_page 
kvm_intel: Unknown symbol kvm_exit 
kvm_intel: Unknown symbol kvm_init 
kvm_intel: Unknown symbol kvm_enable_efer_bits 
kvm_intel: Unknown symbol kvm_timer_intr_post 
kvm_intel: Unknown symbol kvm_mmu_set_nonpresent_ptes 
kvm_intel: Unknown symbol gfn_to_page 
kvm_intel: Unknown symbol segment_base 
kvm_intel: Unknown symbol kvm_get_msr_common 
kvm_intel: Unknown symbol __kvm_set_memory_region 
kvm_intel: Unknown symbol kvm_vcpu_uninit 
kvm_intel: Unknown symbol kvm_emulate_halt 
kvm_intel: Unknown symbol kvm_set_apic_base 
kvm_intel: Unknown symbol kvm_report_emulation_failure 
kvm_intel: Unknown symbol kvm_lapic_find_highest_irr 
kvm_intel: Unknown symbol kvm_task_switch 
kvm_intel: Unknown symbol kvm_enable_tdp 
kvm_intel: Unknown symbol kvm_disable_tdp 
kvm_intel: Unknown symbol kvm_lmsw 
kvm_intel: Unknown symbol kvm_set_memory_region 
kvm_intel: Unknown symbol kvm_queue_exception 
kvm_intel: Unknown symbol emulate_instruction 
kvm_intel: Unknown symbol kvm_write_guest_page 
kvm_intel: Unknown symbol fx_init 
kvm_intel: Unknown symbol kvm_cpu_has_interrupt 
kvm_intel: Unknown symbol kvm_lapic_get_cr8 
kvm_intel: Unknown symbol kvm_set_cr3 
kvm_intel: Unknown symbol kvm_get_cr8 
kvm_intel: Unknown symbol kvm_x86_ops 
kvm_intel: Unknown symbol kvm_vcpu_cache 
kvm_intel: Unknown symbol kvm_emulate_hypercall 
kvm_intel: Unknown symbol load_pdptrs 
kvm_intel: Unknown symbol kvm_handle_fault_on_reboot 
kvm_intel: Unknown symbol kvm_mmu_unprotect_page_virt 
kvm_intel: Unknown symbol kvm_set_cr4 
kvm_intel: Unknown symbol kvm_set_cr0 
kvm_intel: Unknown symbol kvm_set_cr8 
kvm_intel: Unknown symbol kvm_lapic_enabled 
kvm_intel: Unknown symbol kvm_mmu_page_fault 
kvm_intel: Unknown symbol kvm_mmu_reset_context 
kvm_intel: Unknown symbol kvm_queue_exception_e 
kvm_intel: Unknown symbol kvm_emulate_cpuid 
kvm_intel: Unknown symbol kvm_vcpu_init 
kvm_intel: Unknown symbol gfn_to_hva 
kvm_intel: Unknown symbol kvm_mmu_invlpg 
kvm_intel: Unknown symbol kvm_set_msr_common 
kvm_intel: Unknown symbol kvm_mmu_set_base_ptes 
kvm_intel: Unknown symbol kvm_cpu_get_interrupt 
kvm_intel: Unknown symbol kvm_emulate_pio 
kvm_intel: Unknown symbol kvm_mmu_set_mask_ptes 
kvm_intel: Unknown symbol kvm_is_error_hva 
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2135076 ] Lilo boot on virtio; keytable read / checksum error

2008-09-28 Thread SourceForge.net

Bugs item #2135076, was opened at 2008-09-29 11:02
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2135076&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: intel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Shen Okinudo (okinu)
Assigned to: Nobody/Anonymous (nobody)
Summary: Lilo boot on virtio; keytable read / checksum error

Initial Comment:
When booting a virtio disk using lilo (v1.22.8-3.1ubuntu1) on an intel 64 bit 
Ubuntu 8.04 guest, I get the message "keytable read / checksum error". The host 
system is the same one.

The problem exists since kvm-70 up to and including kvm-76.

The same guest can be booted using kvm-69.

The vm had been created using the qcow2 format.

BTW, the current grub boot loader (0.97-29ubuntu21) does not work either.


--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2135076&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2135061 ] vde support disabled

2008-09-28 Thread SourceForge.net

Bugs item #2135061, was opened at 2008-09-29 10:51
Message generated for change (Tracker Item Submitted) made by Item Submitter
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2135061&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: intel
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Shen Okinudo (okinu)
Assigned to: Nobody/Anonymous (nobody)
Summary: vde support disabled

Initial Comment:
Even when not using the "--disable-vde" configuratiion flag vde-support shows 
up with "no".

Thias happens on an Intel 64 bit host.


--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2135061&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ kvm-Bugs-2063072 ] compiling problem with "tcg_ctx"

2008-09-28 Thread SourceForge.net

Bugs item #2063072, was opened at 2008-08-21 06:29
Message generated for change (Comment added) made by okinu
You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2063072&group_id=180599

Please note that this message will contain a full copy of the comment thread,
including the initial issue submission, for this request,
not just the latest update.
Category: qemu
Group: None
Status: Open
Resolution: None
Priority: 5
Private: No
Submitted By: Jana Delego (janado)
Assigned to: Anthony Liguori (aliguori)
Summary: compiling problem with "tcg_ctx"

Initial Comment:
When compiling kvm using the "--disable-cpu-emulation" flag on a 64 bit Intel 
Ubuntu, the compiler aborts with error "undefined reference to tcg_ctx",

This problem exists since kvm-70.


--

Comment By: Shen Okinudo (okinu)
Date: 2008-09-29 10:37

Message:
This bug persists in kvm-76

--

Comment By: Marshal Newrock (freedombi)
Date: 2008-09-02 08:40

Message:
Logged In: YES 
user_id=2201280
Originator: NO

This seems to work with kvm-74.  The patch allowed compilation, and the
guest appears to be running well.

--

Comment By: Amit Shah (amitshah)
Date: 2008-08-29 18:59

Message:
Logged In: YES 
user_id=201894
Originator: NO

I'm not sure if this will make qemu work properly, but it fixes the build
(also attached). Can you confirm if this works?

commit 244cafe6688940c25c81b31aa223c9e24656806e
Author: Amit Shah <[EMAIL PROTECTED]>
Date:   Fri Aug 29 15:20:14 2008 +0530

KVM: QEMU: Fix userspace build with --disable-cpu-emulation

I'm not sure this will work properly, but fixes the build.
ppc might need something like this as well

Signed-off-by: Amit Shah <[EMAIL PROTECTED]>

diff --git a/qemu/target-i386/fake-exec.c b/qemu/target-i386/fake-exec.c
index 737286d..552089b 100644
--- a/qemu/target-i386/fake-exec.c
+++ b/qemu/target-i386/fake-exec.c
@@ -12,6 +12,13 @@
  */
 #include "exec.h"
 #include "cpu.h"
+#include "tcg.h"
+
+/* code generation context */
+TCGContext tcg_ctx;
+
+uint16_t gen_opc_buf[OPC_BUF_SIZE];
+TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];

 int code_copy_enabled = 0;

@@ -45,10 +52,6 @@ int cpu_x86_gen_code(CPUState *env, TranslationBlock
*tb, int *gen_code_size_ptr
 return 0;
 }

-void flush_icache_range(unsigned long start, unsigned long stop)
-{
-}
-
 void optimize_flags_init(void)
 {
 }


File Added: 0001-KVM-QEMU-Fix-userspace-build-with-disable-cpu-em.patch

--

You can respond by visiting: 
https://sourceforge.net/tracker/?func=detail&atid=893831&aid=2063072&group_id=180599
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Status of pci passthrough work?

2008-09-28 Thread Thomas Fjellstrom

On Sunday 28 September 2008, Amit Shah wrote:
> * On Saturday 27 Sep 2008 13:27:46 Thomas Fjellstrom wrote:
> > On Saturday 27 September 2008, Han, Weidong wrote:
> > > Thomas Fjellstrom wrote:
> > > > On Saturday 27 September 2008, Han, Weidong wrote:
> > > >> Hi Thomas,
> > > >>
> > > >> the patches of passthrough/VT-d on kvm.git are already checked in.
> > > >> With Amit's userspace patches, you can assign device to guest. You
> > > >> can have a try.
> > > >
> > > > Does that mean I need VT-d support in hardware? All I have to test
> > > > with right now is an AMD Phenom X4  with a 780g+sb700 system. Don't
> > > > think it has an iommu, and I'd find it odd if the intel VT-d code
> > > > "just worked" on amd's hardware.
> > >
> > > Yes, currently you need VT-d support in hardware to assign device.
> >
> > So I take it the PV-DMA (or pv-dma doesn't do what I think it does...) or
> > the other 1:1 device pass through work isn't working right now?
>
> pvdma does work, but the most recent patches aren't yet published (I should
> do that). It will work for simple devices.
>
> 1:1 will also work.

Once the trees are updated?

> > It's something I'd really like to use, but I don't have access to a
> > platform with a hardware iommu. Though I might be able to pick up a
> > replacement board for my new server with the SB750 southbridge which
> > supposedly has AMD's new iommu hardware in it, but I haven't seen any
> > evidence that kvm or linux supports it.
>
> Linux 2.6.27 onwards supports AMD IOMMU. kvm (and device assignment)
> support for AMD IOMMU doesn't exist yet, but work is planned to start soon.

What does the kernel supporting it help a person wanting to use it to pass 
through devices to guests if kvm doesn't support it? Also, what would the 
kernel use it for in that case?

> > > >> Thomas Fjellstrom wrote:
> > > >>> I'm very interested in being able to pass a few devices through to
> > > >>> kvm guests. I'm wondering what exactly is working now, and how I
> > > >>> can start testing it?
> > > >>>
> > > >>> the latest kvm release doesn't seem to include any support for it
> > > >>> in userspace, so I can't test it with that...
>
> The userspace patch is undergoing pre-merge revisions.
>
> I'll send out an email once I get my git trees synced up to my working
> revisions.
>
> In the meantime, you can use the patches from the list.

Its rather hard to find the patches from before I joined the list, most 
archives don't keep attachments.

> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

Thanks for the heads up, and all the work on this stuff :)

-- 
Thomas Fjellstrom
[EMAIL PROTECTED]
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

kvm-76 --std-vga problem

2008-09-28 Thread Michael Malone


Hi Everyone,

I just downloaded and ran the new kvm-76 (upgrading from kvm-74) and I 
have a few issues.
I am running Ubuntu 8.04 (Hardy) as my host and Windows XP as my guest.  
I am using VT-d and the kernel modules loaded correctly.


Here is the command I use to start kvm:
kvm-bridge -hda windows2.img -boot c -m 1000 -serial /dev/ttyUSB0 
-serial file:serial.out -smp 2 -usb -usbdevice "tablet" -full-screen 
-cdrom /dev/cdrom


kvm-bridge is a script that configures my network and passes the other 
options straight through to qemu-system-x86_64


1) what is the difference between running qemu-system-x86_64 and kvm 
directly?

Because, when I run kvm directly, I get this error:
Traceback (most recent call last):
 File "/home/malonem/kvm-76/kvm", line 20, in 
   external_module = config.get('shell', 'want_module')
 File "/usr/lib/python2.5/ConfigParser.py", line 511, in get
   raise NoSectionError(section)
ConfigParser.NoSectionError: No section: 'shell'

2) (this has been around for quite some time, I just haven't done 
anything about it)

Even though I start up with -smp 2, windows only reads 1 cpu

3) When I run it using the --std-vga parameter, windows boots to just 
before it gets to the "welcome" screen and hangs.  The output shows a 
multitude of "kvm: get_dirty_pages returned -2"  Is this something to do 
with the kernel version I am using?  I am using the standard Ubuntu 
Hardy Kernel 2.6.24-19-generic.  I read the release notes about the 
kernel version with kvm-76 but didn't really understand them and 
presumed that I am using the kernel modules supplied with kvm. (I 
rmmod'd kvm-intel and kvm before I compiled and installed, then 
modprobe'd the modules afterwards and everything seemed to go ok)


Any help or insight would be much appreciated,

Michael

===
This email, including any attachments, is only for the intended
addressee.  It is subject to copyright, is confidential and may be
the subject of legal or other privilege, none of which is waived or
lost by reason of this transmission.
If the receiver is not the intended addressee, please accept our
apologies, notify us by return, delete all copies and perform no
other act on the email.
Unfortunately, we cannot warrant that the email has not been
altered or corrupted during transmission.
===

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 9/9] x86/iommu: use dma_ops_list in get_dma_ops

2008-09-28 Thread Joerg Roedel

On Sat, Sep 27, 2008 at 03:13:21AM +0300, Muli Ben-Yehuda wrote:
> On Fri, Sep 26, 2008 at 02:32:43PM +0200, Joerg Roedel wrote:
> 
> > Ok, the allocation only matters for dma_alloc_coherent. Fujita
> > introduced a generic software-based dma_alloc_coherent recently
> > which you can use for that. I think implementing PVDMA into an own
> > dma_ops backend and multiplex it using my patches introduces less
> > overhead than an additional layer over the current dma_ops
> > implementation.
> 
> I'm not sure what you have in mind, but I agree with Amit that
> conceptually pvdma should be called after the guest's "native" dma_ops
> have done their thing. This is not just for nommu, consider a guest
> that is using an (emulated) hardware IOMMU, or that wants to use
> swiotlb. We can't replicate their functionality in the pv_dma_ops
> layer, we have to let them run first and then pass deal with whatever
> we get back.

I have something in mind what I discussed with Amit at the last KVM
forum. The idea was not ready at the event but meanwhile it has matured
a bit.
I think we should try to build a paravirtualized IOMMU for KVM guests.
It should work this way: We reserve a configurable amount of contiguous
guest physical memory and map it dma contiguous using some kind of
hardware IOMMU. This is possible with all hardare IOMMUs we have in the
field by now, also Calgary and GART. The guest does dma_coherent
allocations from this memory directly and is done. For map_single and map_sg
the guest can do bounce buffering. We avoid nearly all pvdma hypercalls
with this approach, keep guest swapping working and solve also the
problems with device dma_masks and guest memory that is not contigous on
the host side.
For systems without any kind of hardware IOMMU we can extend the
interface to support bounce buffering between host and guest (in this
case we can not avoid the hypercalls). This means that the host
reserves the memory for the DMA transaction (also recognizing the
dma_mask) and copies it from/to the guest directly upon the dma_*_sync
calls.
This is what I have in mind and want to propose. Maybe we can discuss
these ideas here. I think since there are many systems out there with
some kind of hardware IOMMUs (every 64bit AMD processor has a GART) we
should really consider this approach.

> > Another two questions to your approach: What happens if a
> > dma_alloc_coherent allocation crosses page boundarys and the gpa's
> > are not contiguous in host memory? How will dma masks be handled?
> 
> That's a very good question. The host will need to be aware of a
> device's DMA capabilities in order to return I/O addresses (which
> could be hpa's if you don't have an IOMMU) that satisfy them. That's
> quite a pain.

True. And I fear we don't get a simple and clean interface with this
approach.

Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[ANNOUNCE] kvm-76 release

2008-09-28 Thread Avi Kivity


A fairly significant release.  First the caveat:

  kvm-76 userspace will not work with the kvm modules supplied by Linux 
2.6.24 or below.  Note that you can still use the external module 
provided by this release (or earlier ones) with Linux 2.6.16 and above, 
so this should not affect normal use.  If this confuses you, ignore this 
statement.


Now the goodies:

  Significant performance improvements the new out-of-sync mode (by 
Marcelo, based on ideas in Xen) reduces the vmexit rate on hosts without 
npt or ept.


  Much improved scsi and usb support, courtesy qemu upstream

  Kernel support for device assignment (userspace part still to come, 
so don't get too excited)


Live migration, which has recently regressed, ought to be fixed as well.

Enjoy,

Changes from kvm-75:
- merge qemu-svn
  - improved usb support
  - vnc improvements
  - pass uuid to guest
  - scsi improvements
- change virtio-blk to guest-endian configuration (Liu Yu)
- add 'call near absolute' to x86 emulator and testsuite (Mohammed Gamal)
- drop support for kernel memory allocation (Glauber Costa)
- avoid fiddling with ISA mappings (Jan Kiszka)
  - improves debugging support
- fix external module build on 2.6.22 hosts
- make kernel and userspace memory slot count agree (Sheng Yang)
- add 'cmp' to x86 emulator testsuite (Mohammed Gamal)
- allow 'make sync' from toplevel directory
- external module compatibility for get_user_pages_fast() api (Jan Kiszka)
- correct mtrr setup with >= 4GB RAM (Alex Williamson)
- fix ia64 boot with acpi enabled (Xiantao Zhang)
- initialize userspace kvm context (Uri Lublin)
- fix ia64 with >= 3GB RAM (Xiantao Zhang)
- avoid allocating translated code buffer when using kvm
- simplify memory registration (Glauber Costa)
- support kvmtrace with external module (Eduardo Habkost)
- ia64 init code simplification (Jes Sorensen)
- ia64 external module compatibility with older kernels (Xiantao Zhang)
- fix live migration due to slot number getting lost (Uri Lublin)
- prevent live migration if a device is missing on the target (Uri Lublin)
- fix include-compat/asm symlinks (Uri Lublin)
- do not execute halted vcpus (Marcelo Tosatti)
- fix global tlb flushes with NPT (Joerg Roedel)
- fix preemption error on real mode emulation
- fix page aging with EPT (Sheng Yang)
- simplify locking around ppc shadow tlb page access (Hollis Blanchard)
- cleanup redundant vmcs read (Jan Kiszka)
- fix 64-bit jmp instruction emulation
- move vmx msr definitions to common code
- better support for instructions that reference the accumulator
  (Guillaume Thouvenin)
- add cmp acc, imm instruction emulation (Guillaume Thouvenin)
- kernel support for device assignment
  (Allen Kay, Amit Shah, Weidong Han, Ben-Ami Yassour)
- switch to using get_user_pages_fast() (Marcelo Tosatti)
  - nice speedup on 2.6.27+ hosts
- build fix for ia64 (Jes Sorensen)
- code cleanup (Harvey Harrison)
- avoid entering guest after a startup IPI (Gleb Natapov)
- simplify 'push reg' instruction emulation (Guillaume Thouvenin)
- fix error handling on vcpu creation failure (Glauber Costa)
- support Tukwilla ia64 processors (Xiantao Zhang)
- fix tlb flush when converting a large page to small pages (Marcelo 
Tosatti)

- out-of-sync shadow (Marcelo Tosatti)
- reduce inter-processor interrupt generation when using the PIC
  (Marcelo Tosatti)



Notes:
If you use the modules bundled with kvm-76, you can use any version
of Linux from 2.6.16 upwards.  You may also use kvm-76 userspace with
the kvm modules provided by Linux 2.6.25 or above.  Some features may
only be available in newer releases.

http://kvm.qumranet.com


--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/9][RFC] stackable dma_ops for x86

2008-09-28 Thread Joerg Roedel

On Sun, Sep 28, 2008 at 11:21:26PM +0900, FUJITA Tomonori wrote:
> On Mon, 22 Sep 2008 20:21:12 +0200
> Joerg Roedel <[EMAIL PROTECTED]> wrote:
> 
> > Hi,
> > 
> > this patch series implements stackable dma_ops on x86. This is useful to
> > be able to fall back to a different dma_ops implementation if one can
> > not handle a particular device (as necessary for example with
> > paravirtualized device passthrough or if a hardware IOMMU only handles a
> > subset of available devices).
> 
> We already handle the latter. This patchset is more flexible but
> seems to incur more overheads.
> 
> This feature will be used for only paravirtualized device passthrough?
> If so, I feel that there is more simpler (and specific) solutions for
> it.

Its not only for device passthrough. It handles also the cases where a
hardware IOMMU does not handle all devices in the system (like in some
Calgary systems but also possible with AMD IOMMU). With this patchset we
can handle these cases in a generic way without hacking it into the
hardware drivers (these hacks are also in the AMD IOMMU code and I plan
to remove them in the case this patchset will be accepted).

Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 9/9] x86/iommu: use dma_ops_list in get_dma_ops

2008-09-28 Thread Joerg Roedel

On Sun, Sep 28, 2008 at 11:21:23PM +0900, FUJITA Tomonori wrote:
> > +struct dma_mapping_ops *find_dma_ops_for_device(struct device *dev)
> > +{
> > +   int i;
> > +   unsigned long flags;
> > +   struct dma_mapping_ops *entry, *ops = NULL;
> > +
> > +   read_lock_irqsave(&dma_ops_list_lock, flags);
> > +
> > +   for (i = 0; i < DMA_OPS_TYPE_MAX; ++i)
> > +   list_for_each_entry(entry, &dma_ops_list[i], list) {
> > +   if (!entry->device_supported)
> > +   continue;
> > +   if (entry->device_supported(dev)) {
> > +   ops = entry;
> > +   goto out;
> > +   }
> > +   }
> > +out:
> > +   read_unlock_irqrestore(&dma_ops_list_lock, flags);
> 
> Hmm, every time we call dma_sg/map_single, we call
> read_lock_irqsave(&dma_ops_list_lock, flags). It's likely that we see
> notable performance drop?

Hmm, we should only call find_dma_ops_for_device() the first time a
dma api call is done (look into get_dma_ops). But I also thought about
how this lock can be avoided. In the real world it should not be
necessary because the dma_ops list is initialized before dma api calls
are done. But since there is now a register function which can be called
its safer this way. What do you think, are we still safe enough without
this lock?

Joerg

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 11/25] KVM: x86 emulator: fix jmp r/m64 instruction

2008-09-28 Thread Avi Kivity

From: Avi Kivity <[EMAIL PROTECTED]>

jmp r/m64 doesn't require the rex.w prefix to indicate the operand size
is 64 bits.  Set the Stack attribute (even though it doesn't involve the
stack, really) to indicate this.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86_emulate.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 0630d21..0c120c4 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -288,7 +288,7 @@ static u16 group_table[] = {
[Group5*8] =
DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
SrcMem | ModRM | Stack, 0,
-   SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0,
+   SrcMem | ModRM | Stack, 0, SrcMem | ModRM | Stack, 0,
[Group7*8] =
0, 0, ModRM | SrcMem, ModRM | SrcMem,
SrcNone | ModRM | DstMem | Mov, 0,
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 12/25] add MAINTAINERS entry for the KVM AMD module

2008-09-28 Thread Avi Kivity

From: Joerg Roedel <[EMAIL PROTECTED]>

Signed-off-by: Joerg Roedel <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 MAINTAINERS |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index cad81a2..232ff4a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2416,6 +2416,13 @@ L:   kvm@vger.kernel.org
 W: http://kvm.qumranet.com
 S: Supported
 
+KERNEL VIRTUAL MACHINE (KVM) FOR AMD-V
+P: Joerg Roedel
+M: [EMAIL PROTECTED]
+L: kvm@vger.kernel.org
+W: http://kvm.qumranet.com
+S: Supported
+
 KERNEL VIRTUAL MACHINE (KVM) FOR POWERPC
 P: Hollis Blanchard
 M: [EMAIL PROTECTED]
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 13/25] x86: Move VMX MSRs to msr-index.h

2008-09-28 Thread Avi Kivity

From: Sheng Yang <[EMAIL PROTECTED]>

They are hardware specific MSRs, and we would use them in virtualization
feature detection later.

Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/vmx.h  |   15 ---
 include/asm-x86/msr-index.h |   16 
 2 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 41e8c10..86059f4 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -331,21 +331,6 @@ enum vmcs_field {
 
 #define AR_RESERVD_MASK 0xfffe0f00
 
-#define MSR_IA32_VMX_BASIC  0x480
-#define MSR_IA32_VMX_PINBASED_CTLS  0x481
-#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
-#define MSR_IA32_VMX_EXIT_CTLS  0x483
-#define MSR_IA32_VMX_ENTRY_CTLS 0x484
-#define MSR_IA32_VMX_MISC   0x485
-#define MSR_IA32_VMX_CR0_FIXED0 0x486
-#define MSR_IA32_VMX_CR0_FIXED1 0x487
-#define MSR_IA32_VMX_CR4_FIXED0 0x488
-#define MSR_IA32_VMX_CR4_FIXED1 0x489
-#define MSR_IA32_VMX_VMCS_ENUM  0x48a
-#define MSR_IA32_VMX_PROCBASED_CTLS20x48b
-#define MSR_IA32_VMX_EPT_VPID_CAP   0x48c
-
-#define MSR_IA32_FEATURE_CONTROL0x3a
 #define IA32_FEATURE_CONTROL_LOCKED_BIT0x1
 #define IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT 0x4
 
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 44bce77..63632b8 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -176,6 +176,7 @@
 #define MSR_IA32_TSC   0x0010
 #define MSR_IA32_PLATFORM_ID   0x0017
 #define MSR_IA32_EBL_CR_POWERON0x002a
+#define MSR_IA32_FEATURE_CONTROL0x003a
 
 #define MSR_IA32_APICBASE  0x001b
 #define MSR_IA32_APICBASE_BSP  (1<<8)
@@ -310,4 +311,19 @@
 /* Geode defined MSRs */
 #define MSR_GEODE_BUSCONT_CONF00x1900
 
+/* Intel VT MSRs */
+#define MSR_IA32_VMX_BASIC  0x0480
+#define MSR_IA32_VMX_PINBASED_CTLS  0x0481
+#define MSR_IA32_VMX_PROCBASED_CTLS 0x0482
+#define MSR_IA32_VMX_EXIT_CTLS  0x0483
+#define MSR_IA32_VMX_ENTRY_CTLS 0x0484
+#define MSR_IA32_VMX_MISC   0x0485
+#define MSR_IA32_VMX_CR0_FIXED0 0x0486
+#define MSR_IA32_VMX_CR0_FIXED1 0x0487
+#define MSR_IA32_VMX_CR4_FIXED0 0x0488
+#define MSR_IA32_VMX_CR4_FIXED1 0x0489
+#define MSR_IA32_VMX_VMCS_ENUM  0x048a
+#define MSR_IA32_VMX_PROCBASED_CTLS20x048b
+#define MSR_IA32_VMX_EPT_VPID_CAP   0x048c
+
 #endif /* __ASM_MSR_INDEX_H */
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 20/25] KVM: Device Assignment with VT-d

2008-09-28 Thread Avi Kivity

From: Ben-Ami Yassour <[EMAIL PROTECTED]>

Based on a patch by: Kay, Allen M <[EMAIL PROTECTED]>

This patch enables PCI device assignment based on VT-d support.
When a device is assigned to the guest, the guest memory is pinned and
the mapping is updated in the VT-d IOMMU.

[Amit: Expose KVM_CAP_IOMMU so we can check if an IOMMU is present
and also control enable/disable from userspace]

Signed-off-by: Kay, Allen M <[EMAIL PROTECTED]>
Signed-off-by: Weidong Han <[EMAIL PROTECTED]>
Signed-off-by: Ben-Ami Yassour <[EMAIL PROTECTED]>
Signed-off-by: Amit Shah <[EMAIL PROTECTED]>

Acked-by: Mark Gross <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/Makefile  |3 +
 arch/x86/kvm/vtd.c |  198 
 arch/x86/kvm/x86.c |   14 +++
 include/asm-x86/kvm_host.h |   23 +-
 include/linux/kvm.h|3 +
 include/linux/kvm_host.h   |   52 
 virt/kvm/kvm_main.c|9 ++-
 7 files changed, 281 insertions(+), 21 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c

diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index d0e940b..3072b17 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -12,6 +12,9 @@ EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm
 
 kvm-objs := $(common-objs) x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o \
i8254.o
+ifeq ($(CONFIG_DMAR),y)
+kvm-objs += vtd.o
+endif
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/arch/x86/kvm/vtd.c b/arch/x86/kvm/vtd.c
new file mode 100644
index 000..667bf3f
--- /dev/null
+++ b/arch/x86/kvm/vtd.c
@@ -0,0 +1,198 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Author: Allen M. Kay <[EMAIL PROTECTED]>
+ * Author: Weidong Han <[EMAIL PROTECTED]>
+ * Author: Ben-Ami Yassour <[EMAIL PROTECTED]>
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+   gfn_t base_gfn, unsigned long npages);
+
+int kvm_iommu_map_pages(struct kvm *kvm,
+   gfn_t base_gfn, unsigned long npages)
+{
+   gfn_t gfn = base_gfn;
+   pfn_t pfn;
+   int i, r;
+   struct dmar_domain *domain = kvm->arch.intel_iommu_domain;
+
+   /* check if iommu exists and in use */
+   if (!domain)
+   return 0;
+
+   r = -EINVAL;
+   for (i = 0; i < npages; i++) {
+   /* check if already mapped */
+   pfn = (pfn_t)intel_iommu_iova_to_pfn(domain,
+gfn_to_gpa(gfn));
+   if (pfn && !is_mmio_pfn(pfn))
+   continue;
+
+   pfn = gfn_to_pfn(kvm, gfn);
+   if (!is_mmio_pfn(pfn)) {
+   r = intel_iommu_page_mapping(domain,
+gfn_to_gpa(gfn),
+pfn_to_hpa(pfn),
+PAGE_SIZE,
+DMA_PTE_READ |
+DMA_PTE_WRITE);
+   if (r) {
+   printk(KERN_DEBUG "kvm_iommu_map_pages:"
+  "iommu failed to map pfn=%lx\n", pfn);
+   goto unmap_pages;
+   }
+   } else {
+   printk(KERN_DEBUG "kvm_iommu_map_page:"
+  "invalid pfn=%lx\n", pfn);
+   goto unmap_pages;
+   }
+   gfn++;
+   }
+   return 0;
+
+unmap_pages:
+   kvm_iommu_put_pages(kvm, base_gfn, i);
+   return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+   int i, r;
+
+   down_read(&kvm->slots_lock);
+   for (i = 0; i < kvm->nmemslots; i++) {
+   r = kvm_iommu_map_pages(kvm, kvm->memslots[i].base_gfn,
+   kvm->memslots[i].npages);
+   if (r)
+   break;
+   }
+   up_r

[PATCH 18/25] KVM: x86 emulator: Use DstAcc for 'and'

2008-09-28 Thread Avi Kivity

From: Guillaume Thouvenin <[EMAIL PROTECTED]>

For instruction 'and al,imm' we use DstAcc instead of doing
the emulation directly into the instruction's opcode.

Signed-off-by: Guillaume Thouvenin <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86_emulate.c |   21 ++---
 1 files changed, 2 insertions(+), 19 deletions(-)

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 2b43208..ea05117 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -96,7 +96,7 @@ static u16 opcode_table[256] = {
/* 0x20 - 0x27 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-   SrcImmByte, SrcImm, 0, 0,
+   DstAcc | SrcImmByte, DstAcc | SrcImm, 0, 0,
/* 0x28 - 0x2F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -1392,27 +1392,10 @@ special_insn:
  sbb:  /* sbb */
emulate_2op_SrcV("sbb", c->src, c->dst, ctxt->eflags);
break;
-   case 0x20 ... 0x23:
+   case 0x20 ... 0x25:
  and:  /* and */
emulate_2op_SrcV("and", c->src, c->dst, ctxt->eflags);
break;
-   case 0x24:  /* and al imm8 */
-   c->dst.type = OP_REG;
-   c->dst.ptr = &c->regs[VCPU_REGS_RAX];
-   c->dst.val = *(u8 *)c->dst.ptr;
-   c->dst.bytes = 1;
-   c->dst.orig_val = c->dst.val;
-   goto and;
-   case 0x25:  /* and ax imm16, or eax imm32 */
-   c->dst.type = OP_REG;
-   c->dst.bytes = c->op_bytes;
-   c->dst.ptr = &c->regs[VCPU_REGS_RAX];
-   if (c->op_bytes == 2)
-   c->dst.val = *(u16 *)c->dst.ptr;
-   else
-   c->dst.val = *(u32 *)c->dst.ptr;
-   c->dst.orig_val = c->dst.val;
-   goto and;
case 0x28 ... 0x2d:
  sub:  /* sub */
emulate_2op_SrcV("sub", c->src, c->dst, ctxt->eflags);
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 16/25] KVM: x86 emulator: Add DstAcc operand type

2008-09-28 Thread Avi Kivity

From: Guillaume Thouvenin <[EMAIL PROTECTED]>

Add DstAcc operand type. That means that there are 4 bits now for
DstMask.

"In the good old days cpus would have only one register that was able to
 fully participate in arithmetic operations, typically called A for
 Accumulator.  The x86 retains this tradition by having special, shorter
 encodings for the A register (like the cmp opcode), and even some
 instructions that only operate on A (like mul).

 SrcAcc and DstAcc would accommodate these instructions by decoding A
 into the corresponding 'struct operand'."
  -- Avi Kivity

Signed-off-by: Guillaume Thouvenin <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86_emulate.c |   50 +--
 1 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 0c120c4..4390ec8 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -47,25 +47,26 @@
 #define ImplicitOps (1<<1) /* Implicit in opcode. No generic decode. */
 #define DstReg  (2<<1) /* Register operand. */
 #define DstMem  (3<<1) /* Memory operand. */
-#define DstMask (3<<1)
+#define DstAcc  (4<<1)  /* Destination Accumulator */
+#define DstMask (7<<1)
 /* Source operand type. */
-#define SrcNone (0<<3) /* No source operand. */
-#define SrcImplicit (0<<3) /* Source operand is implicit in the opcode. */
-#define SrcReg  (1<<3) /* Register operand. */
-#define SrcMem  (2<<3) /* Memory operand. */
-#define SrcMem16(3<<3) /* Memory operand (16-bit). */
-#define SrcMem32(4<<3) /* Memory operand (32-bit). */
-#define SrcImm  (5<<3) /* Immediate operand. */
-#define SrcImmByte  (6<<3) /* 8-bit sign-extended immediate operand. */
-#define SrcMask (7<<3)
+#define SrcNone (0<<4) /* No source operand. */
+#define SrcImplicit (0<<4) /* Source operand is implicit in the opcode. */
+#define SrcReg  (1<<4) /* Register operand. */
+#define SrcMem  (2<<4) /* Memory operand. */
+#define SrcMem16(3<<4) /* Memory operand (16-bit). */
+#define SrcMem32(4<<4) /* Memory operand (32-bit). */
+#define SrcImm  (5<<4) /* Immediate operand. */
+#define SrcImmByte  (6<<4) /* 8-bit sign-extended immediate operand. */
+#define SrcMask (7<<4)
 /* Generic ModRM decode. */
-#define ModRM   (1<<6)
+#define ModRM   (1<<7)
 /* Destination is only written; never read. */
-#define Mov (1<<7)
-#define BitOp   (1<<8)
-#define MemAbs  (1<<9)  /* Memory operand is absolute displacement */
-#define String  (1<<10) /* String instruction (rep capable) */
-#define Stack   (1<<11) /* Stack instruction (push/pop) */
+#define Mov (1<<8)
+#define BitOp   (1<<9)
+#define MemAbs  (1<<10)  /* Memory operand is absolute displacement */
+#define String  (1<<12) /* String instruction (rep capable) */
+#define Stack   (1<<13) /* Stack instruction (push/pop) */
 #define Group   (1<<14) /* Bits 3:5 of modrm byte extend opcode */
 #define GroupDual   (1<<15) /* Alternate decoding of mod == 3 */
 #define GroupMask   0xff/* Group number stored in bits 0:7 */
@@ -1060,6 +1061,23 @@ done_prefixes:
}
c->dst.type = OP_MEM;
break;
+   case DstAcc:
+   c->dst.type = OP_REG;
+   c->dst.bytes = c->op_bytes;
+   c->dst.ptr = &c->regs[VCPU_REGS_RAX];
+   switch (c->op_bytes) {
+   case 1:
+   c->dst.val = *(u8 *)c->dst.ptr;
+   break;
+   case 2:
+   c->dst.val = *(u16 *)c->dst.ptr;
+   break;
+   case 4:
+   c->dst.val = *(u32 *)c->dst.ptr;
+   break;
+   }
+   c->dst.orig_val = c->dst.val;
+   break;
}
 
if (c->rip_relative)
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 17/25] KVM: x86 emulator: Add cmp al, imm and cmp ax, imm instructions (ocodes 3c, 3d)

2008-09-28 Thread Avi Kivity

From: Guillaume Thouvenin <[EMAIL PROTECTED]>

Add decode entries for these opcodes; execution is already implemented.

Signed-off-by: Guillaume Thouvenin <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86_emulate.c |3 ++-
 1 files changed, 2 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 4390ec8..2b43208 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -108,7 +108,8 @@ static u16 opcode_table[256] = {
/* 0x38 - 0x3F */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
-   0, 0, 0, 0,
+   ByteOp | DstAcc | SrcImm, DstAcc | SrcImm,
+   0, 0,
/* 0x40 - 0x47 */
DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x48 - 0x4F */
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 19/25] VT-d: Changes to support KVM

2008-09-28 Thread Avi Kivity

From: Kay, Allen M <[EMAIL PROTECTED]>

This patch extends the VT-d driver to support KVM

[Ben: fixed memory pinning]

Signed-off-by: Kay, Allen M <[EMAIL PROTECTED]>
Signed-off-by: Weidong Han <[EMAIL PROTECTED]>
Signed-off-by: Ben-Ami Yassour <[EMAIL PROTECTED]>
Signed-off-by: Amit Shah <[EMAIL PROTECTED]>

Acked-by: Mark Gross <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/pci/dmar.c   |4 +-
 drivers/pci/intel-iommu.c|  116 +-
 drivers/pci/iova.c   |2 +-
 {drivers/pci => include/linux}/intel-iommu.h |   11 +++
 {drivers/pci => include/linux}/iova.h|0
 5 files changed, 126 insertions(+), 7 deletions(-)
 rename {drivers/pci => include/linux}/intel-iommu.h (94%)
 rename {drivers/pci => include/linux}/iova.h (100%)

diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae..1df28ea 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -26,8 +26,8 @@
 
 #include 
 #include 
-#include "iova.h"
-#include "intel-iommu.h"
+#include 
+#include 
 
 #undef PREFIX
 #define PREFIX "DMAR:"
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index c3edcdc..089ba3f 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -33,8 +33,8 @@
 #include 
 #include 
 #include 
-#include "iova.h"
-#include "intel-iommu.h"
+#include 
+#include 
 #include  /* force_iommu in this header in x86-64*/
 #include 
 #include 
@@ -160,7 +160,7 @@ static inline void *alloc_domain_mem(void)
return iommu_kmem_cache_alloc(iommu_domain_cache);
 }
 
-static inline void free_domain_mem(void *vaddr)
+static void free_domain_mem(void *vaddr)
 {
kmem_cache_free(iommu_domain_cache, vaddr);
 }
@@ -1414,7 +1414,7 @@ static void domain_remove_dev_info(struct dmar_domain 
*domain)
  * find_domain
  * Note: we use struct pci_dev->dev.archdata.iommu stores the info
  */
-struct dmar_domain *
+static struct dmar_domain *
 find_domain(struct pci_dev *pdev)
 {
struct device_domain_info *info;
@@ -2453,3 +2453,111 @@ int __init intel_iommu_init(void)
return 0;
 }
 
+void intel_iommu_domain_exit(struct dmar_domain *domain)
+{
+   u64 end;
+
+   /* Domain 0 is reserved, so dont process it */
+   if (!domain)
+   return;
+
+   end = DOMAIN_MAX_ADDR(domain->gaw);
+   end = end & (~PAGE_MASK_4K);
+
+   /* clear ptes */
+   dma_pte_clear_range(domain, 0, end);
+
+   /* free page tables */
+   dma_pte_free_pagetable(domain, 0, end);
+
+   iommu_free_domain(domain);
+   free_domain_mem(domain);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_exit);
+
+struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev)
+{
+   struct dmar_drhd_unit *drhd;
+   struct dmar_domain *domain;
+   struct intel_iommu *iommu;
+
+   drhd = dmar_find_matched_drhd_unit(pdev);
+   if (!drhd) {
+   printk(KERN_ERR "intel_iommu_domain_alloc: drhd == NULL\n");
+   return NULL;
+   }
+
+   iommu = drhd->iommu;
+   if (!iommu) {
+   printk(KERN_ERR
+   "intel_iommu_domain_alloc: iommu == NULL\n");
+   return NULL;
+   }
+   domain = iommu_alloc_domain(iommu);
+   if (!domain) {
+   printk(KERN_ERR
+   "intel_iommu_domain_alloc: domain == NULL\n");
+   return NULL;
+   }
+   if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) {
+   printk(KERN_ERR
+   "intel_iommu_domain_alloc: domain_init() failed\n");
+   intel_iommu_domain_exit(domain);
+   return NULL;
+   }
+   return domain;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc);
+
+int intel_iommu_context_mapping(
+   struct dmar_domain *domain, struct pci_dev *pdev)
+{
+   int rc;
+   rc = domain_context_mapping(domain, pdev);
+   return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_context_mapping);
+
+int intel_iommu_page_mapping(
+   struct dmar_domain *domain, dma_addr_t iova,
+   u64 hpa, size_t size, int prot)
+{
+   int rc;
+   rc = domain_page_mapping(domain, iova, hpa, size, prot);
+   return rc;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_page_mapping);
+
+void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn)
+{
+   detach_domain_for_dev(domain, bus, devfn);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_detach_dev);
+
+struct dmar_domain *
+intel_iommu_find_domain(struct pci_dev *pdev)
+{
+   return find_domain(pdev);
+}
+EXPORT_SYMBOL_GPL(intel_iommu_find_domain);
+
+int intel_iommu_found(void)
+{
+   return g_num_of_iommus;
+}
+EXPORT_SYMBOL_GPL(intel_iommu_found);
+
+u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova)
+{
+   struct dma_pte *pte;
+   u64 pfn;
+
+   pfn = 0;
+   pte = addr_to_dma_pte(domain, iova);
+
+   if (pte)
+   pfn = dma_pte_add

[PATCH 23/25] KVM: switch to get_user_pages_fast

2008-09-28 Thread Avi Kivity

From: Marcelo Tosatti <[EMAIL PROTECTED]>

Convert gfn_to_pfn to use get_user_pages_fast, which can do lockless
pagetable lookups on x86. Kernel compilation on 4-way guest is 3.7%
faster on VMX.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/powerpc/kvm/44x_tlb.c |2 --
 arch/x86/kvm/mmu.c |   23 +--
 arch/x86/kvm/paging_tmpl.h |8 +---
 arch/x86/kvm/vmx.c |4 
 arch/x86/kvm/x86.c |6 --
 virt/kvm/kvm_main.c|   10 +-
 6 files changed, 15 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 7b11fd7..2e227a4 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -147,9 +147,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, 
gfn_t gfn, u64 asid,
stlbe = &vcpu->arch.shadow_tlb[victim];
 
/* Get reference to new page. */
-   down_read(¤t->mm->mmap_sem);
new_page = gfn_to_page(vcpu->kvm, gfn);
-   up_read(¤t->mm->mmap_sem);
if (is_error_page(new_page)) {
printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
kvm_release_page_clean(new_page);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index bce3e25..5779a23 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -405,16 +405,19 @@ static int host_largepage_backed(struct kvm *kvm, gfn_t 
gfn)
 {
struct vm_area_struct *vma;
unsigned long addr;
+   int ret = 0;
 
addr = gfn_to_hva(kvm, gfn);
if (kvm_is_error_hva(addr))
-   return 0;
+   return ret;
 
+   down_read(¤t->mm->mmap_sem);
vma = find_vma(current->mm, addr);
if (vma && is_vm_hugetlb_page(vma))
-   return 1;
+   ret = 1;
+   up_read(¤t->mm->mmap_sem);
 
-   return 0;
+   return ret;
 }
 
 static int is_largepage_backed(struct kvm_vcpu *vcpu, gfn_t large_gfn)
@@ -1140,9 +1143,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
if (gpa == UNMAPPED_GVA)
return NULL;
 
-   down_read(¤t->mm->mmap_sem);
page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-   up_read(¤t->mm->mmap_sem);
 
return page;
 }
@@ -1330,16 +1331,14 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t 
v, int write, gfn_t gfn)
pfn_t pfn;
unsigned long mmu_seq;
 
-   down_read(¤t->mm->mmap_sem);
if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
gfn &= ~(KVM_PAGES_PER_HPAGE-1);
largepage = 1;
}
 
mmu_seq = vcpu->kvm->mmu_notifier_seq;
-   /* implicit mb(), we'll read before PT lock is unlocked */
+   smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, gfn);
-   up_read(¤t->mm->mmap_sem);
 
/* mmio */
if (is_error_pfn(pfn)) {
@@ -1488,15 +1487,13 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t 
gpa,
if (r)
return r;
 
-   down_read(¤t->mm->mmap_sem);
if (is_largepage_backed(vcpu, gfn & ~(KVM_PAGES_PER_HPAGE-1))) {
gfn &= ~(KVM_PAGES_PER_HPAGE-1);
largepage = 1;
}
mmu_seq = vcpu->kvm->mmu_notifier_seq;
-   /* implicit mb(), we'll read before PT lock is unlocked */
+   smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, gfn);
-   up_read(¤t->mm->mmap_sem);
if (is_error_pfn(pfn)) {
kvm_release_pfn_clean(pfn);
return 1;
@@ -1809,15 +1806,13 @@ static void mmu_guess_page_from_pte_write(struct 
kvm_vcpu *vcpu, gpa_t gpa,
return;
gfn = (gpte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
 
-   down_read(¤t->mm->mmap_sem);
if (is_large_pte(gpte) && is_largepage_backed(vcpu, gfn)) {
gfn &= ~(KVM_PAGES_PER_HPAGE-1);
vcpu->arch.update_pte.largepage = 1;
}
vcpu->arch.update_pte.mmu_seq = vcpu->kvm->mmu_notifier_seq;
-   /* implicit mb(), we'll read before PT lock is unlocked */
+   smp_rmb();
pfn = gfn_to_pfn(vcpu->kvm, gfn);
-   up_read(¤t->mm->mmap_sem);
 
if (is_error_pfn(pfn)) {
kvm_release_pfn_clean(pfn);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b671f61..6dd08e0 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -102,14 +102,10 @@ static bool FNAME(cmpxchg_gpte)(struct kvm *kvm,
pt_element_t *table;
struct page *page;
 
-   down_read(¤t->mm->mmap_sem);
page = gfn_to_page(kvm, table_gfn);
-   up_read(¤t->mm->mmap_sem);
 
table = kmap_atomic(page, KM_USER0);
-
ret = CMPXCHG(&table[index], orig_pte, new_pte);
-
kunmap_atomic(table, KM_USER0);
 
kvm_release_page_dirty(page);
@@ -418,7 +414,6 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t 
addr,

[PATCH 25/25] MAINTAINERS: Update Avi Kivity's email address

2008-09-28 Thread Avi Kivity

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 MAINTAINERS |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 232ff4a..6f1f241 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2411,7 +2411,7 @@ S:Supported
 
 KERNEL VIRTUAL MACHINE (KVM)
 P: Avi Kivity
-M: [EMAIL PROTECTED]
+M: [EMAIL PROTECTED]
 L: kvm@vger.kernel.org
 W: http://kvm.qumranet.com
 S: Supported
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 24/25] KVM: x86.c make kvm_load_realmode_segment static

2008-09-28 Thread Avi Kivity

From: Harvey Harrison <[EMAIL PROTECTED]>

Noticed by sparse:
arch/x86/kvm/x86.c:3591:5: warning: symbol 'kvm_load_realmode_segment' was not 
declared. Should it be static?

Signed-off-by: Harvey Harrison <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 108f072..1b738cb 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3611,7 +3611,7 @@ static int load_segment_descriptor_to_kvm_desct(struct 
kvm_vcpu *vcpu,
return 0;
 }
 
-int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int seg)
+static int kvm_load_realmode_segment(struct kvm_vcpu *vcpu, u16 selector, int 
seg)
 {
struct kvm_segment segvar = {
.base = selector << 4,
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 22/25] KVM: opencode gfn_to_page in kvm_vm_fault

2008-09-28 Thread Avi Kivity

From: Marcelo Tosatti <[EMAIL PROTECTED]>

kvm_vm_fault is invoked with mmap_sem held in read mode. Since gfn_to_page
will be converted to get_user_pages_fast, which requires this lock NOT
to be held, switch to opencoded get_user_pages.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 virt/kvm/kvm_main.c |   19 ---
 1 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f42d5c2..2907d05 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1394,17 +1394,22 @@ out:
 
 static int kvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 {
+   struct page *page[1];
+   unsigned long addr;
+   int npages;
+   gfn_t gfn = vmf->pgoff;
struct kvm *kvm = vma->vm_file->private_data;
-   struct page *page;
 
-   if (!kvm_is_visible_gfn(kvm, vmf->pgoff))
+   addr = gfn_to_hva(kvm, gfn);
+   if (kvm_is_error_hva(addr))
return VM_FAULT_SIGBUS;
-   page = gfn_to_page(kvm, vmf->pgoff);
-   if (is_error_page(page)) {
-   kvm_release_page_clean(page);
+
+   npages = get_user_pages(current, current->mm, addr, 1, 1, 0, page,
+   NULL);
+   if (unlikely(npages != 1))
return VM_FAULT_SIGBUS;
-   }
-   vmf->page = page;
+
+   vmf->page = page[0];
return 0;
 }
 
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 21/25] KVM: Device Assignment: Free device structures if IRQ allocation fails

2008-09-28 Thread Avi Kivity

From: Amit Shah <[EMAIL PROTECTED]>

When an IRQ allocation fails, we free up the device structures and
disable the device so that we can unregister the device in the
userspace and not expose it to the guest at all.

Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86.c |   86 +++-
 1 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c8a2793..61eddbe 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -166,6 +166,43 @@ static void kvm_assigned_dev_ack_irq(struct 
kvm_irq_ack_notifier *kian)
enable_irq(dev->host_irq);
 }
 
+static void kvm_free_assigned_device(struct kvm *kvm,
+struct kvm_assigned_dev_kernel
+*assigned_dev)
+{
+   if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested)
+   free_irq(assigned_dev->host_irq, (void *)assigned_dev);
+
+   kvm_unregister_irq_ack_notifier(kvm, &assigned_dev->ack_notifier);
+
+   if (cancel_work_sync(&assigned_dev->interrupt_work))
+   /* We had pending work. That means we will have to take
+* care of kvm_put_kvm.
+*/
+   kvm_put_kvm(kvm);
+
+   pci_release_regions(assigned_dev->dev);
+   pci_disable_device(assigned_dev->dev);
+   pci_dev_put(assigned_dev->dev);
+
+   list_del(&assigned_dev->list);
+   kfree(assigned_dev);
+}
+
+static void kvm_free_all_assigned_devices(struct kvm *kvm)
+{
+   struct list_head *ptr, *ptr2;
+   struct kvm_assigned_dev_kernel *assigned_dev;
+
+   list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
+   assigned_dev = list_entry(ptr,
+ struct kvm_assigned_dev_kernel,
+ list);
+
+   kvm_free_assigned_device(kvm, assigned_dev);
+   }
+}
+
 static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
   struct kvm_assigned_irq
   *assigned_irq)
@@ -194,8 +231,8 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 
if (irqchip_in_kernel(kvm)) {
if (!capable(CAP_SYS_RAWIO)) {
-   return -EPERM;
-   goto out;
+   r = -EPERM;
+   goto out_release;
}
 
if (assigned_irq->host_irq)
@@ -214,17 +251,18 @@ static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
 */
if (request_irq(match->host_irq, kvm_assigned_dev_intr, 0,
"kvm_assigned_device", (void *)match)) {
-   printk(KERN_INFO "%s: couldn't allocate irq for pv "
-  "device\n", __func__);
r = -EIO;
-   goto out;
+   goto out_release;
}
}
 
match->irq_requested = true;
-out:
mutex_unlock(&kvm->lock);
return r;
+out_release:
+   mutex_unlock(&kvm->lock);
+   kvm_free_assigned_device(kvm, match);
+   return r;
 }
 
 static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
@@ -300,40 +338,6 @@ out_free:
return r;
 }
 
-static void kvm_free_assigned_devices(struct kvm *kvm)
-{
-   struct list_head *ptr, *ptr2;
-   struct kvm_assigned_dev_kernel *assigned_dev;
-
-   list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
-   assigned_dev = list_entry(ptr,
- struct kvm_assigned_dev_kernel,
- list);
-
-   if (irqchip_in_kernel(kvm) && assigned_dev->irq_requested) {
-   free_irq(assigned_dev->host_irq,
-(void *)assigned_dev);
-
-   kvm_unregister_irq_ack_notifier(kvm,
-   &assigned_dev->
-   ack_notifier);
-   }
-
-   if (cancel_work_sync(&assigned_dev->interrupt_work))
-   /* We had pending work. That means we will have to take
-* care of kvm_put_kvm.
-*/
-   kvm_put_kvm(kvm);
-
-   pci_release_regions(assigned_dev->dev);
-   pci_disable_device(assigned_dev->dev);
-   pci_dev_put(assigned_dev->dev);
-
-   list_del(&assigned_dev->list);
-   kfree(assigned_dev);
-   }
-}
-
 unsigned long segment_base(u16 selector)
 {
struct descriptor_table gdt;
@@ -4296,7 +4300,7 @@ static void kvm_free_vcpus(struct kvm *kvm)
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
kvm_iommu_unmap_guest(kvm);
-   kvm_free_assigned_devices(kv

[PATCH 14/25] KVM: VMX: Rename IA32_FEATURE_CONTROL bits

2008-09-28 Thread Avi Kivity

From: Sheng Yang <[EMAIL PROTECTED]>

Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/vmx.c |   18 +-
 arch/x86/kvm/vmx.h |4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f8e615f..046a91b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -1041,9 +1041,9 @@ static __init int vmx_disabled_by_bios(void)
u64 msr;
 
rdmsrl(MSR_IA32_FEATURE_CONTROL, msr);
-   return (msr & (IA32_FEATURE_CONTROL_LOCKED_BIT |
-  IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT))
-   == IA32_FEATURE_CONTROL_LOCKED_BIT;
+   return (msr & (FEATURE_CONTROL_LOCKED |
+  FEATURE_CONTROL_VMXON_ENABLED))
+   == FEATURE_CONTROL_LOCKED;
/* locked but not enabled */
 }
 
@@ -1055,14 +1055,14 @@ static void hardware_enable(void *garbage)
 
INIT_LIST_HEAD(&per_cpu(vcpus_on_cpu, cpu));
rdmsrl(MSR_IA32_FEATURE_CONTROL, old);
-   if ((old & (IA32_FEATURE_CONTROL_LOCKED_BIT |
-   IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT))
-   != (IA32_FEATURE_CONTROL_LOCKED_BIT |
-   IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT))
+   if ((old & (FEATURE_CONTROL_LOCKED |
+   FEATURE_CONTROL_VMXON_ENABLED))
+   != (FEATURE_CONTROL_LOCKED |
+   FEATURE_CONTROL_VMXON_ENABLED))
/* enable and lock */
wrmsrl(MSR_IA32_FEATURE_CONTROL, old |
-  IA32_FEATURE_CONTROL_LOCKED_BIT |
-  IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT);
+  FEATURE_CONTROL_LOCKED |
+  FEATURE_CONTROL_VMXON_ENABLED);
write_cr4(read_cr4() | X86_CR4_VMXE); /* FIXME: not cpu hotplug safe */
asm volatile (ASM_VMX_VMXON_RAX
  : : "a"(&phys_addr), "m"(phys_addr)
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 86059f4..44cfab7 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -331,8 +331,8 @@ enum vmcs_field {
 
 #define AR_RESERVD_MASK 0xfffe0f00
 
-#define IA32_FEATURE_CONTROL_LOCKED_BIT0x1
-#define IA32_FEATURE_CONTROL_VMXON_ENABLED_BIT 0x4
+#define FEATURE_CONTROL_LOCKED (1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED  (1<<2)
 
 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT   9
 #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 08/25] KVM: x86: unhalt vcpu0 on reset

2008-09-28 Thread Avi Kivity

From: Marcelo Tosatti <[EMAIL PROTECTED]>

Since "KVM: x86: do not execute halted vcpus", HLT by vcpu0 before system
reset by the IO thread will hang the guest.

Mark vcpu as runnable in such case.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86.c |6 ++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index bf98d40..2134f3e 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3959,6 +3959,12 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
kvm_set_segment(vcpu, &sregs->tr, VCPU_SREG_TR);
kvm_set_segment(vcpu, &sregs->ldt, VCPU_SREG_LDTR);
 
+   /* Older userspace won't unhalt the vcpu on reset. */
+   if (vcpu->vcpu_id == 0 && kvm_rip_read(vcpu) == 0xfff0 &&
+   sregs->cs.selector == 0xf000 && sregs->cs.base == 0x &&
+   !(vcpu->arch.cr0 & X86_CR0_PE))
+   vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
vcpu_put(vcpu);
 
return 0;
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 05/25] KVM: x86 emulator: Add in/out instructions (opcodes 0xe4-0xe7, 0xec-0xef)

2008-09-28 Thread Avi Kivity

From: Mohammed Gamal <[EMAIL PROTECTED]>

The patch adds in/out instructions to the x86 emulator.

The instruction was encountered while running the BIOS while using
the invalid guest state emulation patch.

Signed-off-by: Mohammed Gamal <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86_emulate.c |   35 +--
 1 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 944f1f4..3ac2f14 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -177,11 +177,14 @@ static u16 opcode_table[256] = {
/* 0xD8 - 0xDF */
0, 0, 0, 0, 0, 0, 0, 0,
/* 0xE0 - 0xE7 */
-   0, 0, 0, 0, 0, 0, 0, 0,
+   0, 0, 0, 0,
+   SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
+   SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xE8 - 0xEF */
ImplicitOps | Stack, SrcImm | ImplicitOps,
ImplicitOps, SrcImmByte | ImplicitOps,
-   0, 0, 0, 0,
+   SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
+   SrcNone | ByteOp | ImplicitOps, SrcNone | ImplicitOps,
/* 0xF0 - 0xF7 */
0, 0, 0, 0,
ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
@@ -1259,6 +1262,8 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
u64 msr_data;
unsigned long saved_eip = 0;
struct decode_cache *c = &ctxt->decode;
+   unsigned int port;
+   int io_dir_in;
int rc = 0;
 
/* Shadow copy of register state. Committed on successful emulation.
@@ -1687,6 +1692,16 @@ special_insn:
c->src.val = c->regs[VCPU_REGS_RCX];
emulate_grp2(ctxt);
break;
+   case 0xe4:  /* inb */
+   case 0xe5:  /* in */
+   port = insn_fetch(u8, 1, c->eip);
+   io_dir_in = 1;
+   goto do_io;
+   case 0xe6: /* outb */
+   case 0xe7: /* out */
+   port = insn_fetch(u8, 1, c->eip);
+   io_dir_in = 0;
+   goto do_io;
case 0xe8: /* call (near) */ {
long int rel;
switch (c->op_bytes) {
@@ -1737,6 +1752,22 @@ special_insn:
jmp_rel(c, c->src.val);
c->dst.type = OP_NONE; /* Disable writeback. */
break;
+   case 0xec: /* in al,dx */
+   case 0xed: /* in (e/r)ax,dx */
+   port = c->regs[VCPU_REGS_RDX];
+   io_dir_in = 1;
+   goto do_io;
+   case 0xee: /* out al,dx */
+   case 0xef: /* out (e/r)ax,dx */
+   port = c->regs[VCPU_REGS_RDX];
+   io_dir_in = 0;
+   do_io:  if (kvm_emulate_pio(ctxt->vcpu, NULL, io_dir_in,
+  (c->d & ByteOp) ? 1 : c->op_bytes,
+  port) != 0) {
+   c->eip = saved_eip;
+   goto cannot_emulate;
+   }
+   return 0;
case 0xf4:  /* hlt */
ctxt->vcpu->arch.halt_request = 1;
break;
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 15/25] x86: Move FEATURE_CONTROL bits to msr-index.h

2008-09-28 Thread Avi Kivity

From: Sheng Yang <[EMAIL PROTECTED]>

For MSR_IA32_FEATURE_CONTROL is already there.

Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/vmx.h  |3 ---
 include/asm-x86/msr-index.h |3 +++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 44cfab7..3e010d2 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -331,9 +331,6 @@ enum vmcs_field {
 
 #define AR_RESERVD_MASK 0xfffe0f00
 
-#define FEATURE_CONTROL_LOCKED (1<<0)
-#define FEATURE_CONTROL_VMXON_ENABLED  (1<<2)
-
 #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT   9
 #define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT 10
 
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 63632b8..715f951 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -178,6 +178,9 @@
 #define MSR_IA32_EBL_CR_POWERON0x002a
 #define MSR_IA32_FEATURE_CONTROL0x003a
 
+#define FEATURE_CONTROL_LOCKED (1<<0)
+#define FEATURE_CONTROL_VMXON_ENABLED  (1<<2)
+
 #define MSR_IA32_APICBASE  0x001b
 #define MSR_IA32_APICBASE_BSP  (1<<8)
 #define MSR_IA32_APICBASE_ENABLE   (1<<11)
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 10/25] KVM: VMX: Cleanup stalled INTR_INFO read

2008-09-28 Thread Avi Kivity

From: Jan Kiszka <[EMAIL PROTECTED]>

Commit 1c0f4f5011829dac96347b5f84ba37c2252e1e08 left a useless access
of VM_ENTRY_INTR_INFO_FIELD in vmx_intr_assist behind. Clean this up.

Signed-off-by: Jan Kiszka <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/vmx.c |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index e7e8c86..f8e615f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3135,11 +3135,8 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx)
 
 static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 {
-   u32 intr_info_field;
-
update_tpr_threshold(vcpu);
 
-   intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
if (cpu_has_virtual_nmis()) {
if (vcpu->arch.nmi_pending && !vcpu->arch.nmi_injected) {
if (vmx_nmi_enabled(vcpu)) {
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 09/25] KVM: ppc: kvmppc_44x_shadow_release() does not require mmap_sem to be locked

2008-09-28 Thread Avi Kivity

From: Hollis Blanchard <[EMAIL PROTECTED]>

And it gets in the way of get_user_pages_fast().

Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/powerpc/kvm/44x_tlb.c |8 +---
 1 files changed, 1 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c
index 3594bbd..7b11fd7 100644
--- a/arch/powerpc/kvm/44x_tlb.c
+++ b/arch/powerpc/kvm/44x_tlb.c
@@ -110,7 +110,6 @@ static int kvmppc_44x_tlbe_is_writable(struct tlbe *tlbe)
return tlbe->word2 & (PPC44x_TLB_SW|PPC44x_TLB_UW);
 }
 
-/* Must be called with mmap_sem locked for writing. */
 static void kvmppc_44x_shadow_release(struct kvm_vcpu *vcpu,
   unsigned int index)
 {
@@ -150,17 +149,16 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, 
gfn_t gfn, u64 asid,
/* Get reference to new page. */
down_read(¤t->mm->mmap_sem);
new_page = gfn_to_page(vcpu->kvm, gfn);
+   up_read(¤t->mm->mmap_sem);
if (is_error_page(new_page)) {
printk(KERN_ERR "Couldn't get guest page for gfn %lx!\n", gfn);
kvm_release_page_clean(new_page);
-   up_read(¤t->mm->mmap_sem);
return;
}
hpaddr = page_to_phys(new_page);
 
/* Drop reference to old page. */
kvmppc_44x_shadow_release(vcpu, victim);
-   up_read(¤t->mm->mmap_sem);
 
vcpu->arch.shadow_pages[victim] = new_page;
 
@@ -194,7 +192,6 @@ void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t 
eaddr,
int i;
 
/* XXX Replace loop with fancy data structures. */
-   down_write(¤t->mm->mmap_sem);
for (i = 0; i <= tlb_44x_hwater; i++) {
struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
unsigned int tid;
@@ -219,7 +216,6 @@ void kvmppc_mmu_invalidate(struct kvm_vcpu *vcpu, gva_t 
eaddr,
stlbe->tid, stlbe->word0, stlbe->word1,
stlbe->word2, handler);
}
-   up_write(¤t->mm->mmap_sem);
 }
 
 /* Invalidate all mappings on the privilege switch after PID has been changed.
@@ -231,7 +227,6 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int 
usermode)
 
if (vcpu->arch.swap_pid) {
/* XXX Replace loop with fancy data structures. */
-   down_write(¤t->mm->mmap_sem);
for (i = 0; i <= tlb_44x_hwater; i++) {
struct tlbe *stlbe = &vcpu->arch.shadow_tlb[i];
 
@@ -243,7 +238,6 @@ void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int 
usermode)
stlbe->tid, stlbe->word0, stlbe->word1,
stlbe->word2, handler);
}
-   up_write(¤t->mm->mmap_sem);
vcpu->arch.swap_pid = 0;
}
 
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 07/25] KVM: x86 emulator: Add call near absolute instruction (opcode 0xff/2)

2008-09-28 Thread Avi Kivity

From: Mohammed Gamal <[EMAIL PROTECTED]>

Add call near absolute instruction.

Signed-off-by: Mohammed Gamal <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86_emulate.c |   11 ++-
 1 files changed, 10 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 3ac2f14..0630d21 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -286,7 +286,8 @@ static u16 group_table[] = {
ByteOp | DstMem | SrcNone | ModRM, ByteOp | DstMem | SrcNone | ModRM,
0, 0, 0, 0, 0, 0,
[Group5*8] =
-   DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM, 0, 0,
+   DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
+   SrcMem | ModRM | Stack, 0,
SrcMem | ModRM, 0, SrcMem | ModRM | Stack, 0,
[Group7*8] =
0, 0, ModRM | SrcMem, ModRM | SrcMem,
@@ -1162,6 +1163,14 @@ static inline int emulate_grp45(struct x86_emulate_ctxt 
*ctxt,
case 1: /* dec */
emulate_1op("dec", c->dst, ctxt->eflags);
break;
+   case 2: /* call near abs */ {
+   long int old_eip;
+   old_eip = c->eip;
+   c->eip = c->src.val;
+   c->src.val = old_eip;
+   emulate_push(ctxt);
+   break;
+   }
case 4: /* jmp abs */
c->eip = c->src.val;
break;
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 06/25] KVM: x86: do not execute halted vcpus

2008-09-28 Thread Avi Kivity

From: Marcelo Tosatti <[EMAIL PROTECTED]>

Offline or uninitialized vcpu's can be executed if requested to perform
userspace work.

Follow Avi's suggestion to handle halted vcpu's in the main loop,
simplifying kvm_emulate_halt(). Introduce a new vcpu->requests bit to
indicate events that promote state from halted to running.

Also standardize vcpu wake sites.

Signed-off-by: Marcelo Tosatti  redhat.com>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/i8254.c |5 +-
 arch/x86/kvm/lapic.c |   16 ++--
 arch/x86/kvm/x86.c   |  100 +-
 include/linux/kvm_host.h |1 +
 virt/kvm/kvm_main.c  |   10 ++--
 5 files changed, 67 insertions(+), 65 deletions(-)

diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
index 4cb4430..634132a 100644
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -200,10 +200,9 @@ static int __pit_timer_fn(struct kvm_kpit_state *ps)
 
if (!atomic_inc_and_test(&pt->pending))
set_bit(KVM_REQ_PENDING_TIMER, &vcpu0->requests);
-   if (vcpu0 && waitqueue_active(&vcpu0->wq)) {
-   vcpu0->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+
+   if (vcpu0 && waitqueue_active(&vcpu0->wq))
wake_up_interruptible(&vcpu0->wq);
-   }
 
pt->timer.expires = ktime_add_ns(pt->timer.expires, pt->period);
pt->scheduled = ktime_to_ns(pt->timer.expires);
diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c
index be94f93..fd00f69 100644
--- a/arch/x86/kvm/lapic.c
+++ b/arch/x86/kvm/lapic.c
@@ -339,13 +339,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
} else
apic_clear_vector(vector, apic->regs + APIC_TMR);
 
-   if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
-   kvm_vcpu_kick(vcpu);
-   else if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED) {
-   vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-   if (waitqueue_active(&vcpu->wq))
-   wake_up_interruptible(&vcpu->wq);
-   }
+   kvm_vcpu_kick(vcpu);
 
result = (orig_irr == 0);
break;
@@ -384,8 +378,7 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) {
vcpu->arch.sipi_vector = vector;
vcpu->arch.mp_state = KVM_MP_STATE_SIPI_RECEIVED;
-   if (waitqueue_active(&vcpu->wq))
-   wake_up_interruptible(&vcpu->wq);
+   kvm_vcpu_kick(vcpu);
}
break;
 
@@ -950,10 +943,9 @@ static int __apic_timer_fn(struct kvm_lapic *apic)
 
if(!atomic_inc_and_test(&apic->timer.pending))
set_bit(KVM_REQ_PENDING_TIMER, &apic->vcpu->requests);
-   if (waitqueue_active(q)) {
-   apic->vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
+   if (waitqueue_active(q))
wake_up_interruptible(q);
-   }
+
if (apic_lvtt_period(apic)) {
result = 1;
apic->timer.dev.expires = ktime_add_ns(
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 3f3cb71..bf98d40 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2798,11 +2798,6 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu)
KVMTRACE_0D(HLT, vcpu, handler);
if (irqchip_in_kernel(vcpu->kvm)) {
vcpu->arch.mp_state = KVM_MP_STATE_HALTED;
-   up_read(&vcpu->kvm->slots_lock);
-   kvm_vcpu_block(vcpu);
-   down_read(&vcpu->kvm->slots_lock);
-   if (vcpu->arch.mp_state != KVM_MP_STATE_RUNNABLE)
-   return -EINTR;
return 1;
} else {
vcpu->run->exit_reason = KVM_EXIT_HLT;
@@ -3097,24 +3092,10 @@ static void vapic_exit(struct kvm_vcpu *vcpu)
up_read(&vcpu->kvm->slots_lock);
 }
 
-static int __vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+static int vcpu_enter_guest(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
int r;
 
-   if (unlikely(vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED)) {
-   pr_debug("vcpu %d received sipi with vector # %x\n",
-  vcpu->vcpu_id, vcpu->arch.sipi_vector);
-   kvm_lapic_reset(vcpu);
-   r = kvm_x86_ops->vcpu_reset(vcpu);
-   if (r)
-   return r;
-   vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
-   }
-
-   down_read(&vcpu->kvm->slots_lock);
-   vapic_enter(vcpu);
-
-again:
if (vcpu->requests)
if (test_and_clear_bit(KVM_REQ_MMU_RELOAD, &vcpu->requests))
kvm_mmu_unload(vcpu);
@@ -3151,22 +3132,13 @@ again:
 
local_irq_disable();
 
-   if (vcpu->requests

[PATCH 04/25] KVM: Add statistics for guest irq injections

2008-09-28 Thread Avi Kivity

From: Avi Kivity <[EMAIL PROTECTED]>

These can help show whether a guest is making progress or not.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/svm.c |1 +
 arch/x86/kvm/vmx.c |1 +
 arch/x86/kvm/x86.c |1 +
 include/asm-x86/kvm_host.h |1 +
 4 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 6022888..9b54550 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1519,6 +1519,7 @@ static inline void svm_inject_irq(struct vcpu_svm *svm, 
int irq)
 
KVMTRACE_1D(INJ_VIRQ, &svm->vcpu, (u32)irq, handler);
 
+   ++svm->vcpu.stat.irq_injections;
control = &svm->vmcb->control;
control->int_vector = irq;
control->int_ctl &= ~V_INTR_PRIO_MASK;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 71e57ae..e7e8c86 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2341,6 +2341,7 @@ static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 
KVMTRACE_1D(INJ_VIRQ, vcpu, (u32)irq, handler);
 
+   ++vcpu->stat.irq_injections;
if (vcpu->arch.rmode.active) {
vmx->rmode.irq.pending = true;
vmx->rmode.irq.vector = irq;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index e3b8966..3f3cb71 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -92,6 +92,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
{ "fpu_reload", VCPU_STAT(fpu_reload) },
{ "insn_emulation", VCPU_STAT(insn_emulation) },
{ "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) },
+   { "irq_injections", VCPU_STAT(irq_injections) },
{ "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) },
{ "mmu_pte_write", VM_STAT(mmu_pte_write) },
{ "mmu_pte_updated", VM_STAT(mmu_pte_updated) },
diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
index 982b6b2..815efc3 100644
--- a/include/asm-x86/kvm_host.h
+++ b/include/asm-x86/kvm_host.h
@@ -413,6 +413,7 @@ struct kvm_vcpu_stat {
u32 insn_emulation;
u32 insn_emulation_fail;
u32 hypercalls;
+   u32 irq_injections;
 };
 
 struct descriptor_table {
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 03/25] KVM: MMU: Modify kvm_shadow_walk.entry to accept u64 addr

2008-09-28 Thread Avi Kivity

From: Sheng Yang <[EMAIL PROTECTED]>

EPT is 4 level by default in 32pae(48 bits), but the addr parameter
of kvm_shadow_walk->entry() only accept unsigned long as virtual
address, which is 32bit in 32pae. This result in SHADOW_PT_INDEX()
overflow when try to fetch level 4 index.

Fix it by extend kvm_shadow_walk->entry() to accept 64bit addr in
parameter.

Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/mmu.c |   10 +-
 arch/x86/kvm/paging_tmpl.h |4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 866d713..bce3e25 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -144,7 +144,7 @@ struct kvm_rmap_desc {
 
 struct kvm_shadow_walk {
int (*entry)(struct kvm_shadow_walk *walk, struct kvm_vcpu *vcpu,
-gva_t addr, u64 *spte, int level);
+u64 addr, u64 *spte, int level);
 };
 
 static struct kmem_cache *pte_chain_cache;
@@ -941,7 +941,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
 }
 
 static int walk_shadow(struct kvm_shadow_walk *walker,
-  struct kvm_vcpu *vcpu, gva_t addr)
+  struct kvm_vcpu *vcpu, u64 addr)
 {
hpa_t shadow_addr;
int level;
@@ -1270,7 +1270,7 @@ struct direct_shadow_walk {
 
 static int direct_map_entry(struct kvm_shadow_walk *_walk,
struct kvm_vcpu *vcpu,
-   gva_t addr, u64 *sptep, int level)
+   u64 addr, u64 *sptep, int level)
 {
struct direct_shadow_walk *walk =
container_of(_walk, struct direct_shadow_walk, walker);
@@ -1289,7 +1289,7 @@ static int direct_map_entry(struct kvm_shadow_walk *_walk,
 
if (*sptep == shadow_trap_nonpresent_pte) {
pseudo_gfn = (addr & PT64_DIR_BASE_ADDR_MASK) >> PAGE_SHIFT;
-   sp = kvm_mmu_get_page(vcpu, pseudo_gfn, addr, level - 1,
+   sp = kvm_mmu_get_page(vcpu, pseudo_gfn, (gva_t)addr, level - 1,
  1, ACC_ALL, sptep);
if (!sp) {
pgprintk("nonpaging_map: ENOMEM\n");
@@ -1317,7 +1317,7 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, 
int write,
.pt_write = 0,
};
 
-   r = walk_shadow(&walker.walker, vcpu, (gva_t)gfn << PAGE_SHIFT);
+   r = walk_shadow(&walker.walker, vcpu, gfn << PAGE_SHIFT);
if (r < 0)
return r;
return walker.pt_write;
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index b7064e1..b671f61 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -286,7 +286,7 @@ static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct 
kvm_mmu_page *page,
  * Fetch a shadow pte for a specific level in the paging hierarchy.
  */
 static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk *_sw,
-   struct kvm_vcpu *vcpu, gva_t addr,
+   struct kvm_vcpu *vcpu, u64 addr,
u64 *sptep, int level)
 {
struct shadow_walker *sw =
@@ -326,7 +326,7 @@ static int FNAME(shadow_walk_entry)(struct kvm_shadow_walk 
*_sw,
metaphysical = 0;
table_gfn = gw->table_gfn[level - 2];
}
-   shadow_page = kvm_mmu_get_page(vcpu, table_gfn, addr, level-1,
+   shadow_page = kvm_mmu_get_page(vcpu, table_gfn, (gva_t)addr, level-1,
   metaphysical, access, sptep);
if (!metaphysical) {
r = kvm_read_guest_atomic(vcpu->kvm, gw->pte_gpa[level - 2],
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 02/25] KVM: ia64: Implement kvm_arch_vcpu_ioctl_{set,get}_mpstate

2008-09-28 Thread Avi Kivity

From: Xiantao Zhang <[EMAIL PROTECTED]>

Two ioctl arch functions are added to set vcpu's smp state.

Signed-off-by: Xiantao Zhang <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/ia64/kvm/kvm-ia64.c |   37 +++--
 1 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index cd0d1a7..7ad759e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -179,6 +179,7 @@ int kvm_dev_ioctl_check_extension(long ext)
switch (ext) {
case KVM_CAP_IRQCHIP:
case KVM_CAP_USER_MEMORY:
+   case KVM_CAP_MP_STATE:
 
r = 1;
break;
@@ -1789,11 +1790,43 @@ int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
 int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
 {
-   return -EINVAL;
+   vcpu_load(vcpu);
+   mp_state->mp_state = vcpu->arch.mp_state;
+   vcpu_put(vcpu);
+   return 0;
+}
+
+static int vcpu_reset(struct kvm_vcpu *vcpu)
+{
+   int r;
+   long psr;
+   local_irq_save(psr);
+   r = kvm_insert_vmm_mapping(vcpu);
+   if (r)
+   goto fail;
+
+   vcpu->arch.launched = 0;
+   kvm_arch_vcpu_uninit(vcpu);
+   r = kvm_arch_vcpu_init(vcpu);
+   if (r)
+   goto fail;
+
+   kvm_purge_vmm_mapping(vcpu);
+   r = 0;
+fail:
+   local_irq_restore(psr);
+   return r;
 }
 
 int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu,
struct kvm_mp_state *mp_state)
 {
-   return -EINVAL;
+   int r = 0;
+
+   vcpu_load(vcpu);
+   vcpu->arch.mp_state = mp_state->mp_state;
+   if (vcpu->arch.mp_state == KVM_MP_STATE_UNINITIALIZED)
+   r = vcpu_reset(vcpu);
+   vcpu_put(vcpu);
+   return r;
 }
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 00/25] KVM Updates for 2.6.28 merge window (part 3 of 3)

2008-09-28 Thread Avi Kivity

Here is the third and last batch of the KVM updates for the 2.6.28 merge window.

Linux 2.6.28 KVM will introduce support for pci device assignment and will
improve overall emulation accuracy.

Amit Shah (1):
  KVM: Device Assignment: Free device structures if IRQ allocation
fails

Avi Kivity (3):
  KVM: Add statistics for guest irq injections
  KVM: x86 emulator: fix jmp r/m64 instruction
  MAINTAINERS: Update Avi Kivity's email address

Ben-Ami Yassour (1):
  KVM: Device Assignment with VT-d

Guillaume Thouvenin (3):
  KVM: x86 emulator: Add DstAcc operand type
  KVM: x86 emulator: Add cmp al, imm and cmp ax, imm instructions
(ocodes 3c, 3d)
  KVM: x86 emulator: Use DstAcc for 'and'

Harvey Harrison (1):
  KVM: x86.c make kvm_load_realmode_segment static

Hollis Blanchard (1):
  KVM: ppc: kvmppc_44x_shadow_release() does not require mmap_sem to be
locked

Jan Kiszka (1):
  KVM: VMX: Cleanup stalled INTR_INFO read

Joerg Roedel (1):
  add MAINTAINERS entry for the KVM AMD module

Kay, Allen M (1):
  VT-d: Changes to support KVM

Marcelo Tosatti (4):
  KVM: x86: do not execute halted vcpus
  KVM: x86: unhalt vcpu0 on reset
  KVM: opencode gfn_to_page in kvm_vm_fault
  KVM: switch to get_user_pages_fast

Mohammed Gamal (3):
  KVM: x86 emulator: Add std and cld instructions (opcodes 0xfc-0xfd)
  KVM: x86 emulator: Add in/out instructions (opcodes 0xe4-0xe7,
0xec-0xef)
  KVM: x86 emulator: Add call near absolute instruction (opcode 0xff/2)

Sheng Yang (4):
  KVM: MMU: Modify kvm_shadow_walk.entry to accept u64 addr
  x86: Move VMX MSRs to msr-index.h
  KVM: VMX: Rename IA32_FEATURE_CONTROL bits
  x86: Move FEATURE_CONTROL bits to msr-index.h

Xiantao Zhang (1):
  KVM: ia64: Implement kvm_arch_vcpu_ioctl_{set,get}_mpstate

 MAINTAINERS  |9 +-
 arch/ia64/kvm/kvm-ia64.c |   37 -
 arch/powerpc/kvm/44x_tlb.c   |8 -
 arch/x86/kvm/Makefile|3 +
 arch/x86/kvm/i8254.c |5 +-
 arch/x86/kvm/lapic.c |   16 +--
 arch/x86/kvm/mmu.c   |   33 ++---
 arch/x86/kvm/paging_tmpl.h   |   12 +-
 arch/x86/kvm/svm.c   |1 +
 arch/x86/kvm/vmx.c   |   26 ++--
 arch/x86/kvm/vmx.h   |   18 --
 arch/x86/kvm/vtd.c   |  198 
 arch/x86/kvm/x86.c   |  215 +++---
 arch/x86/kvm/x86_emulate.c   |  132 +++-
 drivers/pci/dmar.c   |4 +-
 drivers/pci/intel-iommu.c|  116 +-
 drivers/pci/iova.c   |2 +-
 include/asm-x86/kvm_host.h   |   24 +---
 include/asm-x86/msr-index.h  |   19 +++
 {drivers/pci => include/linux}/intel-iommu.h |   11 ++
 {drivers/pci => include/linux}/iova.h|0
 include/linux/kvm.h  |3 +
 include/linux/kvm_host.h |   53 +++
 virt/kvm/kvm_main.c  |   48 --
 24 files changed, 726 insertions(+), 267 deletions(-)
 create mode 100644 arch/x86/kvm/vtd.c
 rename {drivers/pci => include/linux}/intel-iommu.h (94%)
 rename {drivers/pci => include/linux}/iova.h (100%)

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 01/25] KVM: x86 emulator: Add std and cld instructions (opcodes 0xfc-0xfd)

2008-09-28 Thread Avi Kivity

From: Mohammed Gamal <[EMAIL PROTECTED]>

This adds the std and cld instructions to the emulator.

Encountered while running the BIOS with invalid guest
state emulation enabled.

Signed-off-by: Mohammed Gamal <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kvm/x86_emulate.c |   10 +-
 1 files changed, 9 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
index 66e0bd6..944f1f4 100644
--- a/arch/x86/kvm/x86_emulate.c
+++ b/arch/x86/kvm/x86_emulate.c
@@ -187,7 +187,7 @@ static u16 opcode_table[256] = {
ImplicitOps, ImplicitOps, Group | Group3_Byte, Group | Group3,
/* 0xF8 - 0xFF */
ImplicitOps, 0, ImplicitOps, ImplicitOps,
-   0, 0, Group | Group4, Group | Group5,
+   ImplicitOps, ImplicitOps, Group | Group4, Group | Group5,
 };
 
 static u16 twobyte_table[256] = {
@@ -1762,6 +1762,14 @@ special_insn:
ctxt->eflags |= X86_EFLAGS_IF;
c->dst.type = OP_NONE;  /* Disable writeback. */
break;
+   case 0xfc: /* cld */
+   ctxt->eflags &= ~EFLG_DF;
+   c->dst.type = OP_NONE;  /* Disable writeback. */
+   break;
+   case 0xfd: /* std */
+   ctxt->eflags |= EFLG_DF;
+   c->dst.type = OP_NONE;  /* Disable writeback. */
+   break;
case 0xfe ... 0xff: /* Grp4/Grp5 */
rc = emulate_grp45(ctxt, ops);
if (rc != 0)
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [kvm] Re: [PATCH 0/5] bios: >4G updates

2008-09-28 Thread Avi Kivity


Alex Williamson wrote:

I guess the SSDT support was prior to the last merge, and appropriately
dropped.  The most interesting new feature is a boot menu to allow the
user to override the boot device.  That seems pretty useful.  Other
things like better printing of the devices and support for PIIX4 could
come in handy too.  So yeah, it looks worth merging.  I can drop my
first two patches and rework the others so we don't cause unnecessary
merge problems.
  


I tried to merge it, but it the new bios won't boot.  I traced this to 
ata_reset: it toggles the soft reset bit in the command register and 
expects to see the busy bit set, but the ide device model returns zero 
status if the selected device has no drive.


No idea if the device model or the bios is wrong.

If anyone (hi Gleb) wants to take a look, it's in the branch bios-merge 
on git.kernel.org.


btw, commenting this out leads to boot failure as well, but that may be 
related.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] kernel: Makefile: header-link: remove include-compat/asm before resymlinking it

2008-09-28 Thread Avi Kivity


Uri Lublin wrote:

Similar to include/asm.
Otherwise, if already exists, the new symlink is created under the old linked
directory.
  


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] migration: loadvm_state: if a device idstr/instance not found fail the migration

2008-09-28 Thread Avi Kivity


Uri Lublin wrote:

Instead of just printing an error message.
  


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] kernel: Makefile: header-link: remove include-compat/asm before resymlinking it

2008-09-28 Thread Uri Lublin

Similar to include/asm.
Otherwise, if already exists, the new symlink is created under the old linked
directory.

Signed-off-by: Uri Lublin <[EMAIL PROTECTED]>
---
 kernel/Makefile |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/kernel/Makefile b/kernel/Makefile
index ec2d055..d77eb77 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -44,7 +44,7 @@ all:: header-link prerequisite
 sync: header-sync source-sync header-link
 
 header-link:
-   rm -f include/asm
+   rm -f include/asm include-compat/asm
ln -sf asm-$(ARCH_DIR) include/asm
ln -sf asm-$(ARCH_DIR) include-compat/asm
 
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: libkvm: kvm_dirty_pages_log_change: do not forget to set .slot

2008-09-28 Thread Avi Kivity


Uri Lublin wrote:

The default for .slot is 0, which is wrong for all other kvm-slots.

This fixes enabling/disabling kvm dirty page logging.

  


Applied, thanks.

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] migration: loadvm_state: if a device idstr/instance not found fail the migration

2008-09-28 Thread Uri Lublin

Instead of just printing an error message.

Signed-off-by: Uri Lublin <[EMAIL PROTECTED]>
---
 qemu/vl.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/qemu/vl.c b/qemu/vl.c
index b34a114..9a4a4ec 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -7117,6 +7117,8 @@ int qemu_live_loadvm_state(QEMUFile *f)
 if (!se) {
 fprintf(stderr, "qemu: warning: instance 0x%x of device '%s' not 
present in current VM\n", 
 instance_id, idstr);
+ret = -1;
+goto the_end;
 } else {
 if (version_id > se->version_id) { /* src version > dst version */
 fprintf(stderr, "migration:version mismatch:%s:%d(s)>%d(d)\n",
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

libkvm: kvm_dirty_pages_log_change: do not forget to set .slot

2008-09-28 Thread Uri Lublin

The default for .slot is 0, which is wrong for all other kvm-slots.

This fixes enabling/disabling kvm dirty page logging.

Signed-off-by: Uri Lublin <[EMAIL PROTECTED]>
---
 libkvm/libkvm.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index 88d3f5d..dfa63bb 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -172,6 +172,7 @@ static int kvm_dirty_pages_log_change(kvm_context_t kvm, 
unsigned long phys_addr
 
{
struct kvm_userspace_memory_region mem = {
+   .slot = slot,
.memory_size = slots[slot].len,
.guest_phys_addr = slots[slot].phys_addr,
.userspace_addr = slots[slot].userspace_addr,
-- 
1.6.0.1

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/9][RFC] stackable dma_ops for x86

2008-09-28 Thread FUJITA Tomonori

On Mon, 22 Sep 2008 20:21:12 +0200
Joerg Roedel <[EMAIL PROTECTED]> wrote:

> Hi,
> 
> this patch series implements stackable dma_ops on x86. This is useful to
> be able to fall back to a different dma_ops implementation if one can
> not handle a particular device (as necessary for example with
> paravirtualized device passthrough or if a hardware IOMMU only handles a
> subset of available devices).

We already handle the latter. This patchset is more flexible but
seems to incur more overheads.

This feature will be used for only paravirtualized device passthrough?
If so, I feel that there is more simpler (and specific) solutions for
it.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 9/9] x86/iommu: use dma_ops_list in get_dma_ops

2008-09-28 Thread FUJITA Tomonori

On Mon, 22 Sep 2008 20:21:21 +0200
Joerg Roedel <[EMAIL PROTECTED]> wrote:

> This patch enables stackable dma_ops on x86. To do this, it also enables
> the per-device dma_ops on i386.
> 
> Signed-off-by: Joerg Roedel <[EMAIL PROTECTED]>
> ---
>  arch/x86/kernel/pci-dma.c |   26 ++
>  include/asm-x86/device.h  |6 +++---
>  include/asm-x86/dma-mapping.h |   14 +++---
>  3 files changed, 36 insertions(+), 10 deletions(-)
> 
> diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
> index b990fb6..2e517c2 100644
> --- a/arch/x86/kernel/pci-dma.c
> +++ b/arch/x86/kernel/pci-dma.c
> @@ -82,6 +82,32 @@ void x86_register_dma_ops(struct dma_mapping_ops *ops,
>   write_unlock_irqrestore(&dma_ops_list_lock, flags);
>  }
>  
> +struct dma_mapping_ops *find_dma_ops_for_device(struct device *dev)
> +{
> + int i;
> + unsigned long flags;
> + struct dma_mapping_ops *entry, *ops = NULL;
> +
> + read_lock_irqsave(&dma_ops_list_lock, flags);
> +
> + for (i = 0; i < DMA_OPS_TYPE_MAX; ++i)
> + list_for_each_entry(entry, &dma_ops_list[i], list) {
> + if (!entry->device_supported)
> + continue;
> + if (entry->device_supported(dev)) {
> + ops = entry;
> + goto out;
> + }
> + }
> +out:
> + read_unlock_irqrestore(&dma_ops_list_lock, flags);

Hmm, every time we call dma_sg/map_single, we call
read_lock_irqsave(&dma_ops_list_lock, flags). It's likely that we see
notable performance drop?
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] VT-d: Support multiple device assignment to one guest

2008-09-28 Thread Muli Ben-Yehuda

On Sun, Sep 28, 2008 at 02:34:19PM +0800, Han, Weidong wrote:

> I don't understand why this approach reduces IOTLB utility. How to
> say unrelated devices with unrelated buffers competing for the same
> resource?  Multiple devices shares one page table should improve
> IOTLB utility, because some entries in IOTLB can be used for all of
> them.

You are assuming that there will be sharing of frames between
devices. I am assuming that there won't be to any significant
degree. If my assumption is correct, then each device will benefit
from having a full IOTLB for itself.

Cheers,
Muli
-- 
The First Workshop on I/O Virtualization (WIOV '08)
Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/
  xxx
SYSTOR 2009---The Israeli Experimental Systems Conference
http://www.haifa.il.ibm.com/conferences/systor2009/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] VT-d: Support multiple device assignment to one guest

2008-09-28 Thread Muli Ben-Yehuda

On Sun, Sep 28, 2008 at 02:43:26PM +0800, Han, Weidong wrote:

> For pvdma, why each BDF will need its own domain for intra-guest
> protection?

Because if more than one BDF shares an address space, BDFa will be
able to DMA to BDFb's buffers, and vice versa. The point of
intra-guest (not inter-guest) protection is that a device (driver) can
only DMA to its own buffers.

Cheers,
Muli
-- 
The First Workshop on I/O Virtualization (WIOV '08)
Dec 2008, San Diego, CA, http://www.usenix.org/wiov08/
  xxx
SYSTOR 2009---The Israeli Experimental Systems Conference
http://www.haifa.il.ibm.com/conferences/systor2009/
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: kvm > 61 segfaults when started with a bridged tap

2008-09-28 Thread iMil



Hi list,

some month ago, I submitted this issue: 
http://www.mail-archive.com/[EMAIL PROTECTED]/msg16688.html


Recently, I "solved" the problem, here's how :

The backtrace of the generated core dump showed the following :

Program terminated with signal 11, Segmentation fault.
[New process 11204]
#0  0xb7d36e66 in ?? () from /lib/tls/i686/cmov/libc.so.6
(gdb) bt
#0  0xb7d36e66 in ?? () from /lib/tls/i686/cmov/libc.so.6
#1  0xb7d38edd in ?? () from /lib/tls/i686/cmov/libc.so.6
#2  0xb7d3a9ff in calloc () from /lib/tls/i686/cmov/libc.so.6
#3  0xb7715131 in xcb_connect_to_fd () from /usr/lib/libxcb.so.1
#4  0xb7717a51 in xcb_connect () from /usr/lib/libxcb.so.1
#5  0xb784b717 in _XConnectXCB () from /usr/lib/libX11.so.6
#6  0xb7834029 in XOpenDisplay () from /usr/lib/libX11.so.6
#7  0xb7e8986e in ?? () from /usr/lib/libSDL-1.2.so.0
#8  0xb7e94dec in ?? () from /usr/lib/libSDL-1.2.so.0
#9  0xb7e7fbf6 in SDL_VideoInit () from /usr/lib/libSDL-1.2.so.0
#10 0xb7e535c3 in SDL_InitSubSystem () from /usr/lib/libSDL-1.2.so.0
#11 0xb7e53627 in SDL_Init () from /usr/lib/libSDL-1.2.so.0
#12 0x080c2d46 in sdl_display_init (ds=0xb7eb8354, full_screen=0, 
no_frame=0)

at sdl.c:635
#13 0x0805985e in main (argc=11, argv=0xbfc88474)
at /home/imil/src/kvm-75/qemu/vl.c:10011

considering that the SDL initialization lead to a crash, I started 
QEMU/KVM with the -nographic flag... and it worked. At this point, I 
noticed that :


. Starting QEMU/KVM with the -net nic option (without specifying a model) 
+ default SDL output : Segfault
. Starting QEMU/KVM without -net nic + default SDL output :  everything's 
fine except, obviously, the network
. Starting QEMU/KVM with the net -nic option along with -nographic : 
everything's fine except I don't have SDL output (didn't tried VNC)


Then I tried to specifiy a NIC model (model=i82557b) and everything just 
worked, this is my fix.


As far as I can tell, the following configuration is buggy (for me at 
least) :


. Ubuntu Hardy (8.04), 2.6.24-19-generic
. QEMU/KVM > 61 (till I figured out how to solve this issue, I used KVM > 
61 with QEMU/KVM 61)

. Default rtl8139 NIC
. SDL output

Googling showed me only one person that seems to have the same issue under 
Debian (http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=476469), but on 
another machine of mine running Debian Lenny, I never had this problem.


Hope this helps,

regards


Emile "iMil" Heitor <[EMAIL PROTECTED]>  _
   http://gcu-squad.orgASCII ribbon campaign ( )
- against HTML email  X
& vCards / \
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 00/14] Fix & enhance NMI support for KVM - v4

2008-09-28 Thread Gleb Natapov

On Sun, Sep 28, 2008 at 12:45:38PM +0300, Avi Kivity wrote:
> [EMAIL PROTECTED] wrote:
>> Version 4 of this series includes the following fixes:
>>
>>  - Handle NMI task gates (Gleb Natapov)
>>  - Clear internal NMI states on VCPU reset (Gleb Natapov)
>>  - Typo fixes and minor cleanups (Sheng Yang and /me)
>>
>> Hope we are now reaching a mergable state.
>>   
>
> Looks very good to me.  Gleb, can you review this as well?
>
Looks good to me too. And works on CPU without virtual NMI for my test
case.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 04/14] KVM: x86: Reset pending/inject NMI state on CPU reset

2008-09-28 Thread Gleb Natapov

On Fri, Sep 26, 2008 at 09:30:48AM +0200, [EMAIL PROTECTED] wrote:
> CPU reset invalidates pending or already injected NMIs, therefore reset
> the related state variables.
> 
> Based on original patch by Gleb Natapov.
> 
Signed-off-by: Gleb Natapov <[EMAIL PROTECTED]>

> Signed-off-by: Jan Kiszka <[EMAIL PROTECTED]>
> ---
>  arch/x86/kvm/x86.c |3 +++
>  1 file changed, 3 insertions(+)
> 
> Index: b/arch/x86/kvm/x86.c
> ===
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4173,6 +4173,9 @@ void kvm_arch_vcpu_destroy(struct kvm_vc
>  
>  int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
>  {
> + vcpu->arch.nmi_pending = false;
> + vcpu->arch.nmi_injected = false;
> +
>   return kvm_x86_ops->vcpu_reset(vcpu);
>  }
>  

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 03/14] KVM: VMX: Support for NMI task gates

2008-09-28 Thread Gleb Natapov

On Fri, Sep 26, 2008 at 09:30:47AM +0200, [EMAIL PROTECTED] wrote:
> Properly set GUEST_INTR_STATE_NMI and reset nmi_injected when a
> task-switch vmexit happened due to a task gate being used for handling
> NMIs. Also avoid the false warning about valid vectoring info in
> kvm_handle_exit.
> 
> Based on original patch by Gleb Natapov.
> 
Signed-off-by: Gleb Natapov <[EMAIL PROTECTED]>

> Signed-off-by: Jan Kiszka <[EMAIL PROTECTED]>
> ---
>  arch/x86/kvm/vmx.c |   18 +++---
>  1 file changed, 15 insertions(+), 3 deletions(-)
> 
> Index: b/arch/x86/kvm/vmx.c
> ===
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -2819,6 +2819,7 @@ static int handle_apic_access(struct kvm
>  
>  static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
>  {
> + struct vcpu_vmx *vmx = to_vmx(vcpu);
>   unsigned long exit_qualification;
>   u16 tss_selector;
>   int reason;
> @@ -2826,6 +2827,15 @@ static int handle_task_switch(struct kvm
>   exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
>  
>   reason = (u32)exit_qualification >> 30;
> + if (reason == TASK_SWITCH_GATE && vmx->vcpu.arch.nmi_injected &&
> + (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
> + (vmx->idt_vectoring_info & VECTORING_INFO_TYPE_MASK)
> + == INTR_TYPE_NMI_INTR) {
> + vcpu->arch.nmi_injected = false;
> + if (cpu_has_virtual_nmis())
> + vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
> +   GUEST_INTR_STATE_NMI);
> + }
>   tss_selector = exit_qualification;
>  
>   return kvm_task_switch(vcpu, tss_selector, reason);
> @@ -2998,9 +3008,11 @@ static int kvm_handle_exit(struct kvm_ru
>  
>   if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
>   (exit_reason != EXIT_REASON_EXCEPTION_NMI &&
> - exit_reason != EXIT_REASON_EPT_VIOLATION))
> - printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
> -"exit reason is 0x%x\n", __func__, exit_reason);
> + exit_reason != EXIT_REASON_EPT_VIOLATION &&
> + exit_reason != EXIT_REASON_TASK_SWITCH))
> + printk(KERN_WARNING "%s: unexpected, valid vectoring info "
> +"(0x%x) and exit reason is 0x%x\n",
> +__func__, vectoring_info, exit_reason);
>   if (exit_reason < kvm_vmx_max_exit_handlers
>   && kvm_vmx_exit_handlers[exit_reason])
>   return kvm_vmx_exit_handlers[exit_reason](vcpu, kvm_run);

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 00/14] Fix & enhance NMI support for KVM - v4

2008-09-28 Thread Gleb Natapov

On Sun, Sep 28, 2008 at 12:45:38PM +0300, Avi Kivity wrote:
> [EMAIL PROTECTED] wrote:
>> Version 4 of this series includes the following fixes:
>>
>>  - Handle NMI task gates (Gleb Natapov)
>>  - Clear internal NMI states on VCPU reset (Gleb Natapov)
>>  - Typo fixes and minor cleanups (Sheng Yang and /me)
>>
>> Hope we are now reaching a mergable state.
>>   
>
> Looks very good to me.  Gleb, can you review this as well?
>
Looking into it right now.

--
Gleb.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 08/14] KVM: x86: Enable NMI Watchdog via in-kernel PIT source

2008-09-28 Thread Jan Kiszka

Avi Kivity wrote:
> [EMAIL PROTECTED] wrote:
>> LINT0 of the LAPIC can be used to route PIT events as NMI watchdog ticks
>> into the guest. This patch aligns the in-kernel irqchip emulation with
>> the user space irqchip with already supports this feature. The trick is
>> to route PIT interrupts to all LAPIC's LVT0 lines.
>>
>> Rebased and slightly polished patch originally posted by Sheng Yang.
>>
>> Signed-off-by: Jan Kiszka <[EMAIL PROTECTED]>
>> Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
>> ---
>>  arch/x86/kvm/i8254.c |   15 +++
>>  arch/x86/kvm/irq.h   |1 +
>>  arch/x86/kvm/lapic.c |   34 +-
>>  3 files changed, 45 insertions(+), 5 deletions(-)
>>
>> Index: b/arch/x86/kvm/i8254.c
>> ===
>> --- a/arch/x86/kvm/i8254.c
>> +++ b/arch/x86/kvm/i8254.c
>> @@ -594,10 +594,25 @@ void kvm_free_pit(struct kvm *kvm)
>>  
>>  static void __inject_pit_timer_intr(struct kvm *kvm)
>>  {
>> +struct kvm_vcpu *vcpu;
>> +int i;
>> +
>>  mutex_lock(&kvm->lock);
>>  kvm_set_irq(kvm, 0, 1);
>>  kvm_set_irq(kvm, 0, 0);
>>  mutex_unlock(&kvm->lock);
>> +
>> +/*
>> + * Provides NMI watchdog support in IOAPIC mode.
>> + * The route is: PIT -> PIC -> LVT0 in NMI mode,
>> + * timer IRQs will continue to flow through the IOAPIC.
>> + */
>> +for (i = 0; i < KVM_MAX_VCPUS; ++i) {
>> +vcpu = kvm->vcpus[i];
>> +if (!vcpu)
>> +continue;
>> +kvm_apic_local_deliver(vcpu, APIC_LVT0);
>> +}
>>  }
>>   
> 
> It would be better to gate this on a variable which is set only if this
> is actually necessary (e.g, does any vcpu have LVT0 set to NMI mode). 
> Otherwise, we touch all vcpus up to 1000 times a second.  As this is an
> optimization, it can be done later.

Good idea, will cook an add-on patch.

Jan



signature.asc
Description: OpenPGP digital signature

Re: [PATCH 00/14] Fix & enhance NMI support for KVM - v4

2008-09-28 Thread Jan Kiszka

Avi Kivity wrote:
> [EMAIL PROTECTED] wrote:
>> Version 4 of this series includes the following fixes:
>>
>>  - Handle NMI task gates (Gleb Natapov)
>>  - Clear internal NMI states on VCPU reset (Gleb Natapov)
>>  - Typo fixes and minor cleanups (Sheng Yang and /me)
>>
>> Hope we are now reaching a mergable state.
>>   
> 
> Looks very good to me.  Gleb, can you review this as well?
> 
> Jan, please update your configuration to include the full name in
> addition to the email address in the From: header (no need to resend
> just for this).

Sorry, realized this mess after it already went out. Script was fixed
meanwhile.

Jan



signature.asc
Description: OpenPGP digital signature

Re: [PATCH 00/14] Fix & enhance NMI support for KVM - v4

2008-09-28 Thread Avi Kivity


[EMAIL PROTECTED] wrote:

Version 4 of this series includes the following fixes:

 - Handle NMI task gates (Gleb Natapov)
 - Clear internal NMI states on VCPU reset (Gleb Natapov)
 - Typo fixes and minor cleanups (Sheng Yang and /me)

Hope we are now reaching a mergable state.
  


Looks very good to me.  Gleb, can you review this as well?

Jan, please update your configuration to include the full name in 
addition to the email address in the From: header (no need to resend 
just for this).


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 08/14] KVM: x86: Enable NMI Watchdog via in-kernel PIT source

2008-09-28 Thread Avi Kivity


[EMAIL PROTECTED] wrote:

LINT0 of the LAPIC can be used to route PIT events as NMI watchdog ticks
into the guest. This patch aligns the in-kernel irqchip emulation with
the user space irqchip with already supports this feature. The trick is
to route PIT interrupts to all LAPIC's LVT0 lines.

Rebased and slightly polished patch originally posted by Sheng Yang.

Signed-off-by: Jan Kiszka <[EMAIL PROTECTED]>
Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
---
 arch/x86/kvm/i8254.c |   15 +++
 arch/x86/kvm/irq.h   |1 +
 arch/x86/kvm/lapic.c |   34 +-
 3 files changed, 45 insertions(+), 5 deletions(-)

Index: b/arch/x86/kvm/i8254.c
===
--- a/arch/x86/kvm/i8254.c
+++ b/arch/x86/kvm/i8254.c
@@ -594,10 +594,25 @@ void kvm_free_pit(struct kvm *kvm)
 
 static void __inject_pit_timer_intr(struct kvm *kvm)

 {
+   struct kvm_vcpu *vcpu;
+   int i;
+
mutex_lock(&kvm->lock);
kvm_set_irq(kvm, 0, 1);
kvm_set_irq(kvm, 0, 0);
mutex_unlock(&kvm->lock);
+
+   /*
+* Provides NMI watchdog support in IOAPIC mode.
+* The route is: PIT -> PIC -> LVT0 in NMI mode,
+* timer IRQs will continue to flow through the IOAPIC.
+*/
+   for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+   vcpu = kvm->vcpus[i];
+   if (!vcpu)
+   continue;
+   kvm_apic_local_deliver(vcpu, APIC_LVT0);
+   }
 }
  


It would be better to gate this on a variable which is set only if this 
is actually necessary (e.g, does any vcpu have LVT0 set to NMI mode).  
Otherwise, we touch all vcpus up to 1000 times a second.  As this is an 
optimization, it can be done later.



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] VT-d: Fix iommu map page for mmio pages

2008-09-28 Thread Avi Kivity


Muli Ben-Yehuda wrote:

On Sat, Sep 27, 2008 at 01:24:31PM +0300, Avi Kivity wrote:

  

I strongly disagree. You are advocating something that is
potentially unsafe---for the sake of code simplicity?! I am
advocating caution in what we let an *untrusted* guest do.
  

Why would it be unsafe?



Because on at least one machine letting a device DMA to the same
address as another device's MMIO region caused the machine to reboot.
  


That sounds like a hardware bug.  What does the vendor say about this?

--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Status of pci passthrough work?

2008-09-28 Thread Amit Shah

* On Sunday 28 Sep 2008 10:09:13 Avi Kivity wrote:
> Jan C. Bernauer wrote:
> > Hi,
> >
> >  I have about the same problem, so excuse me for hijacking this thread.
> >
> > My hardware consists of a 780g/SB700 Mainboard and a 4850e AMD CPU, and
> > I'm interested in forwarding a DVB-C tuner card to the guest. Maybe
> > some NICs later.
> >
> > I tried and 'sort of' got it working with Amit's kernel and userspace
> > tools.
> > First thing:
> > The dvb-c card has an interesting memory mapping, as reported by
> > lspci -v:
> > Memory at cfdff000 (32-bit, non-prefetchable) [size=512]
> >
> > Size 512 doesn't fly with a check in kvm_main.c:
> > if (mem->memory_size & (PAGE_SIZE - 1))
> > goto out;
> >
> > So I patched the userspace utilities to use 4096 instead.
> >
> > With that patch, the guest saw the card, the driver got loaded,
> > and channel tuning works, but I get some i2c timeouts on the
> > guest side, and the host side has errors like:
> >
> > [ cut here ]
> > Sep 22 02:28:54 [kernel] WARNING: at kernel/irq/manage.c:180
> > enable_irq+0x3a/0x55()
> > Sep 22 02:28:54 [kernel] Unbalanced enable for IRQ 20
>
> That looks due to bad error handling, due to the failures you had before.
>
> Try rebooting the host and starting again with the patched userspace.
>
> Amit, can you take a look at the error handling paths?

I suspect this is so because Jan's using very old trees. The 'master' branch 
used to hold the device assignment patches, They're now upstream.

The patches in the 'vtd' branch are also upstream.

The 'pvdma' branch is the one I should be updating. Currently, it's based on 
an older upstream kvm.git. I'll refresh it and drop a note.

Amit
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Status of pci passthrough work?

2008-09-28 Thread Amit Shah

* On Saturday 27 Sep 2008 13:27:46 Thomas Fjellstrom wrote:
> On Saturday 27 September 2008, Han, Weidong wrote:
> > Thomas Fjellstrom wrote:
> > > On Saturday 27 September 2008, Han, Weidong wrote:
> > >> Hi Thomas,
> > >>
> > >> the patches of passthrough/VT-d on kvm.git are already checked in.
> > >> With Amit's userspace patches, you can assign device to guest. You
> > >> can have a try.
> > >
> > > Does that mean I need VT-d support in hardware? All I have to test
> > > with right now is an AMD Phenom X4  with a 780g+sb700 system. Don't
> > > think it has an iommu, and I'd find it odd if the intel VT-d code
> > > "just worked" on amd's hardware.
> >
> > Yes, currently you need VT-d support in hardware to assign device.
>
> So I take it the PV-DMA (or pv-dma doesn't do what I think it does...) or
> the other 1:1 device pass through work isn't working right now?

pvdma does work, but the most recent patches aren't yet published (I should do 
that). It will work for simple devices.

1:1 will also work.

> It's something I'd really like to use, but I don't have access to a
> platform with a hardware iommu. Though I might be able to pick up a
> replacement board for my new server with the SB750 southbridge which
> supposedly has AMD's new iommu hardware in it, but I haven't seen any
> evidence that kvm or linux supports it.

Linux 2.6.27 onwards supports AMD IOMMU. kvm (and device assignment) support 
for AMD IOMMU doesn't exist yet, but work is planned to start soon.

> > >> Thomas Fjellstrom wrote:
> > >>> I'm very interested in being able to pass a few devices through to
> > >>> kvm guests. I'm wondering what exactly is working now, and how I
> > >>> can start testing it?
> > >>>
> > >>> the latest kvm release doesn't seem to include any support for it in
> > >>> userspace, so I can't test it with that...

The userspace patch is undergoing pre-merge revisions.

I'll send out an email once I get my git trees synced up to my working 
revisions.

In the meantime, you can use the patches from the list.
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

69 matches

Mail list logo