[kvm-devel] [PATCH 44/55] KVM: add kvm_is_error_hva()

2007-12-26 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

Check for the "error hva", an address outside the user address space that
signals a bad gfn.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |1 +
 drivers/kvm/kvm_main.c |   11 +++
 2 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 3f5ffc3..6498324 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -520,6 +520,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
 extern struct page *bad_page;
 
 int is_error_page(struct page *page);
+int kvm_is_error_hva(unsigned long addr);
 int kvm_set_memory_region(struct kvm *kvm,
  struct kvm_userspace_memory_region *mem,
  int user_alloc);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index dacdc66..e3c7f3b 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -498,6 +498,17 @@ int is_error_page(struct page *page)
 }
 EXPORT_SYMBOL_GPL(is_error_page);
 
+static inline unsigned long bad_hva(void)
+{
+   return PAGE_OFFSET;
+}
+
+int kvm_is_error_hva(unsigned long addr)
+{
+   return addr == bad_hva();
+}
+EXPORT_SYMBOL_GPL(kvm_is_error_hva);
+
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
int i;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 55/55] KVM: Portability: Make kvm_vcpu_ioctl_translate arch dependent

2007-12-26 Thread Avi Kivity
From: Zhang Xiantao <[EMAIL PROTECTED]>

Move kvm_vcpu_ioctl_translate to arch, since mmu would be put under arch.

Signed-off-by: Zhang Xiantao <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |3 +++
 drivers/kvm/kvm_main.c |   24 +---
 drivers/kvm/x86.c  |   22 ++
 3 files changed, 26 insertions(+), 23 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index a7be073..e34e246 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -642,6 +642,9 @@ void kvm_arch_destroy_vm(struct kvm *kvm);
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
 int kvm_arch_vcpu_ioctl_set_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu);
 
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+   struct kvm_translation *tr);
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs);
 int kvm_arch_vcpu_ioctl_get_sregs(struct kvm_vcpu *vcpu,
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 9d63a10..bce4216 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -761,28 +761,6 @@ void kvm_resched(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_resched);
 
-/*
- * Translate a guest virtual address to a guest physical address.
- */
-static int kvm_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
-   struct kvm_translation *tr)
-{
-   unsigned long vaddr = tr->linear_address;
-   gpa_t gpa;
-
-   vcpu_load(vcpu);
-   mutex_lock(&vcpu->kvm->lock);
-   gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
-   tr->physical_address = gpa;
-   tr->valid = gpa != UNMAPPED_GVA;
-   tr->writeable = 1;
-   tr->usermode = 0;
-   mutex_unlock(&vcpu->kvm->lock);
-   vcpu_put(vcpu);
-
-   return 0;
-}
-
 static int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu,
struct kvm_interrupt *irq)
 {
@@ -986,7 +964,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
r = -EFAULT;
if (copy_from_user(&tr, argp, sizeof tr))
goto out;
-   r = kvm_vcpu_ioctl_translate(vcpu, &tr);
+   r = kvm_arch_vcpu_ioctl_translate(vcpu, &tr);
if (r)
goto out;
r = -EFAULT;
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index 6097926..f1746af 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -2318,6 +2318,28 @@ struct fxsave {
 #endif
 };
 
+/*
+ * Translate a guest virtual address to a guest physical address.
+ */
+int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
+   struct kvm_translation *tr)
+{
+   unsigned long vaddr = tr->linear_address;
+   gpa_t gpa;
+
+   vcpu_load(vcpu);
+   mutex_lock(&vcpu->kvm->lock);
+   gpa = vcpu->mmu.gva_to_gpa(vcpu, vaddr);
+   tr->physical_address = gpa;
+   tr->valid = gpa != UNMAPPED_GVA;
+   tr->writeable = 1;
+   tr->usermode = 0;
+   mutex_unlock(&vcpu->kvm->lock);
+   vcpu_put(vcpu);
+
+   return 0;
+}
+
 int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
 {
struct fxsave *fxsave = (struct fxsave *)&vcpu->guest_fx_image;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 26/55] KVM: Portability: Move pio emulation functions to x86.c

2007-12-26 Thread Avi Kivity
From: Carsten Otte <[EMAIL PROTECTED]>

This patch moves implementation of the following functions from
kvm_main.c to x86.c:
free_pio_guest_pages, vcpu_find_pio_dev, pio_copy_data, complete_pio,
kernel_pio, pio_string_write, kvm_emulate_pio, kvm_emulate_pio_string

The function inject_gp, which was duplicated by yesterday's patch
series, is removed from kvm_main.c now because it is not needed anymore.

Signed-off-by: Carsten Otte <[EMAIL PROTECTED]>
Acked-by: Hollis Blanchard <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |  248 
 drivers/kvm/x86.c  |  243 +++
 drivers/kvm/x86.h  |1 +
 3 files changed, 244 insertions(+), 248 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 2c5529c..27f3a6e 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -271,17 +271,6 @@ static void kvm_free_physmem(struct kvm *kvm)
kvm_free_physmem_slot(&kvm->memslots[i], NULL);
 }
 
-static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
-{
-   int i;
-
-   for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
-   if (vcpu->pio.guest_pages[i]) {
-   kvm_release_page(vcpu->pio.guest_pages[i]);
-   vcpu->pio.guest_pages[i] = NULL;
-   }
-}
-
 static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu)
 {
vcpu_load(vcpu);
@@ -330,11 +319,6 @@ static int kvm_vm_release(struct inode *inode, struct file 
*filp)
return 0;
 }
 
-static void inject_gp(struct kvm_vcpu *vcpu)
-{
-   kvm_x86_ops->inject_gp(vcpu, 0);
-}
-
 void fx_init(struct kvm_vcpu *vcpu)
 {
unsigned after_mxcsr_mask;
@@ -827,12 +811,6 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
}
 }
 
-static struct kvm_io_device *vcpu_find_pio_dev(struct kvm_vcpu *vcpu,
-  gpa_t addr)
-{
-   return kvm_io_bus_find_dev(&vcpu->kvm->pio_bus, addr);
-}
-
 /*
  * The vCPU has executed a HLT instruction with in-kernel mode enabled.
  */
@@ -1042,232 +1020,6 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_cpuid);
 
-static int pio_copy_data(struct kvm_vcpu *vcpu)
-{
-   void *p = vcpu->pio_data;
-   void *q;
-   unsigned bytes;
-   int nr_pages = vcpu->pio.guest_pages[1] ? 2 : 1;
-
-   q = vmap(vcpu->pio.guest_pages, nr_pages, VM_READ|VM_WRITE,
-PAGE_KERNEL);
-   if (!q) {
-   free_pio_guest_pages(vcpu);
-   return -ENOMEM;
-   }
-   q += vcpu->pio.guest_page_offset;
-   bytes = vcpu->pio.size * vcpu->pio.cur_count;
-   if (vcpu->pio.in)
-   memcpy(q, p, bytes);
-   else
-   memcpy(p, q, bytes);
-   q -= vcpu->pio.guest_page_offset;
-   vunmap(q);
-   free_pio_guest_pages(vcpu);
-   return 0;
-}
-
-static int complete_pio(struct kvm_vcpu *vcpu)
-{
-   struct kvm_pio_request *io = &vcpu->pio;
-   long delta;
-   int r;
-
-   kvm_x86_ops->cache_regs(vcpu);
-
-   if (!io->string) {
-   if (io->in)
-   memcpy(&vcpu->regs[VCPU_REGS_RAX], vcpu->pio_data,
-  io->size);
-   } else {
-   if (io->in) {
-   r = pio_copy_data(vcpu);
-   if (r) {
-   kvm_x86_ops->cache_regs(vcpu);
-   return r;
-   }
-   }
-
-   delta = 1;
-   if (io->rep) {
-   delta *= io->cur_count;
-   /*
-* The size of the register should really depend on
-* current address size.
-*/
-   vcpu->regs[VCPU_REGS_RCX] -= delta;
-   }
-   if (io->down)
-   delta = -delta;
-   delta *= io->size;
-   if (io->in)
-   vcpu->regs[VCPU_REGS_RDI] += delta;
-   else
-   vcpu->regs[VCPU_REGS_RSI] += delta;
-   }
-
-   kvm_x86_ops->decache_regs(vcpu);
-
-   io->count -= io->cur_count;
-   io->cur_count = 0;
-
-   return 0;
-}
-
-static void kernel_pio(struct kvm_io_device *pio_dev,
-  struct kvm_vcpu *vcpu,
-  void *pd)
-{
-   /* TODO: String I/O for in kernel device */
-
-   mutex_lock(&vcpu->kvm->lock);
-   if (vcpu->pio.in)
-   kvm_iodevice_read(pio_dev, vcpu->pio.port,
- vcpu->pio.size,
- pd);
-   else
-   

[kvm-devel] [PATCH 52/55] KVM: x86 emulator: modify 'lods', and 'stos' not to depend on CR2

2007-12-26 Thread Avi Kivity
From: Sheng Yang <[EMAIL PROTECTED]>

The current 'lods' and 'stos' is depending on incoming CR2 rather than decode
memory address from registers.

Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |   14 ++
 1 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index e697947..8e2162f 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1528,7 +1528,9 @@ special_insn:
case 0xaa ... 0xab: /* stos */
c->dst.type = OP_MEM;
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
-   c->dst.ptr = (unsigned long *)cr2;
+   c->dst.ptr = (unsigned long *)register_address(
+  ctxt->es_base,
+  c->regs[VCPU_REGS_RDI]);
c->dst.val = c->regs[VCPU_REGS_RAX];
register_address_increment(c->regs[VCPU_REGS_RDI],
   (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
@@ -1538,9 +1540,13 @@ special_insn:
c->dst.type = OP_REG;
c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
-   if ((rc = ops->read_emulated(cr2, &c->dst.val,
-c->dst.bytes,
-ctxt->vcpu)) != 0)
+   if ((rc = ops->read_emulated(register_address(
+   c->override_base ? *c->override_base :
+  ctxt->ds_base,
+c->regs[VCPU_REGS_RSI]),
+&c->dst.val,
+c->dst.bytes,
+ctxt->vcpu)) != 0)
goto done;
register_address_increment(c->regs[VCPU_REGS_RSI],
   (ctxt->eflags & EFLG_DF) ? -c->dst.bytes
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 20/55] KVM: Portability: move get/set_apic_base to x86.c

2007-12-26 Thread Avi Kivity
From: Carsten Otte <[EMAIL PROTECTED]>

This patch moves the implementation of get_apic_base and set_apic_base
from kvm_main.c to x86.c

Signed-off-by: Carsten Otte <[EMAIL PROTECTED]>
Reviewed-by: Christian Borntraeger <[EMAIL PROTECTED]>
Acked-by: Hollis Blanchard <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |   19 ---
 drivers/kvm/x86.c  |   19 +++
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 00f9c9e..d6545a7 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -556,25 +556,6 @@ unsigned long get_cr8(struct kvm_vcpu *vcpu)
 }
 EXPORT_SYMBOL_GPL(get_cr8);
 
-u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
-{
-   if (irqchip_in_kernel(vcpu->kvm))
-   return vcpu->apic_base;
-   else
-   return vcpu->apic_base;
-}
-EXPORT_SYMBOL_GPL(kvm_get_apic_base);
-
-void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
-{
-   /* TODO: reserve bits check */
-   if (irqchip_in_kernel(vcpu->kvm))
-   kvm_lapic_set_base(vcpu, data);
-   else
-   vcpu->apic_base = data;
-}
-EXPORT_SYMBOL_GPL(kvm_set_apic_base);
-
 void fx_init(struct kvm_vcpu *vcpu)
 {
unsigned after_mxcsr_mask;
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index 5a95922..c26e371 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -59,6 +59,25 @@ unsigned long segment_base(u16 selector)
 }
 EXPORT_SYMBOL_GPL(segment_base);
 
+u64 kvm_get_apic_base(struct kvm_vcpu *vcpu)
+{
+   if (irqchip_in_kernel(vcpu->kvm))
+   return vcpu->apic_base;
+   else
+   return vcpu->apic_base;
+}
+EXPORT_SYMBOL_GPL(kvm_get_apic_base);
+
+void kvm_set_apic_base(struct kvm_vcpu *vcpu, u64 data)
+{
+   /* TODO: reserve bits check */
+   if (irqchip_in_kernel(vcpu->kvm))
+   kvm_lapic_set_base(vcpu, data);
+   else
+   vcpu->apic_base = data;
+}
+EXPORT_SYMBOL_GPL(kvm_set_apic_base);
+
 /*
  * List of msr numbers which we expose to userspace through KVM_GET_MSRS
  * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 36/55] KVM: Add make_page_dirty() to kvm_clear_guest_page()

2007-12-26 Thread Avi Kivity
From: Dor Laor <[EMAIL PROTECTED]>

Every write access to guest pages should be tracked.

Signed-off-by: Dor Laor <[EMAIL PROTECTED]>
Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index e12bdc1..302473d 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -706,6 +706,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int 
offset, int len)
 
kunmap_atomic(page_virt, KM_USER0);
kvm_release_page(page);
+   mark_page_dirty(kvm, gfn);
return 0;
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest_page);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 21/55] KVM: Portability: Move control register helper functions to x86.c

2007-12-26 Thread Avi Kivity
From: Carsten Otte <[EMAIL PROTECTED]>

This patch moves the definitions of CR0_RESERVED_BITS,
CR4_RESERVED_BITS, and CR8_RESERVED_BITS along with the following
functions from kvm_main.c to x86.c:
set_cr0(), set_cr3(), set_cr4(), set_cr8(), get_cr8(), lmsw(),
load_pdptrs()
The static function wrapper inject_gp is duplicated in kvm_main.c and
x86.c for now, the version in kvm_main.c should disappear once the last
user of it is gone too.
The function load_pdptrs is no longer static, and now defined in x86.h
for the time being, until the last user of it is gone from kvm_main.c.

Signed-off-by: Carsten Otte <[EMAIL PROTECTED]>
Reviewed-by: Christian Borntraeger <[EMAIL PROTECTED]>
Acked-by: Hollis Blanchard <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |  219 --
 drivers/kvm/x86.c  |  224 
 drivers/kvm/x86.h  |2 +-
 3 files changed, 225 insertions(+), 220 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index d6545a7..af4b470 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -90,17 +90,6 @@ static struct kvm_stats_debugfs_item {
 
 static struct dentry *debugfs_dir;
 
-#define CR0_RESERVED_BITS  \
-   (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
- | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
- | X86_CR0_NW | X86_CR0_CD | X86_CR0_PG))
-#define CR4_RESERVED_BITS  \
-   (~(unsigned long)(X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE\
- | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE \
- | X86_CR4_PGE | X86_CR4_PCE | X86_CR4_OSFXSR  \
- | X86_CR4_OSXMMEXCPT | X86_CR4_VMXE))
-
-#define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 #define EFER_RESERVED_BITS 0xf2fe
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
@@ -348,214 +337,6 @@ static void inject_gp(struct kvm_vcpu *vcpu)
kvm_x86_ops->inject_gp(vcpu, 0);
 }
 
-/*
- * Load the pae pdptrs.  Return true is they are all valid.
- */
-static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3)
-{
-   gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
-   unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
-   int i;
-   int ret;
-   u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
-
-   mutex_lock(&vcpu->kvm->lock);
-   ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
- offset * sizeof(u64), sizeof(pdpte));
-   if (ret < 0) {
-   ret = 0;
-   goto out;
-   }
-   for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
-   if ((pdpte[i] & 1) && (pdpte[i] & 0xfff001e6ull)) {
-   ret = 0;
-   goto out;
-   }
-   }
-   ret = 1;
-
-   memcpy(vcpu->pdptrs, pdpte, sizeof(vcpu->pdptrs));
-out:
-   mutex_unlock(&vcpu->kvm->lock);
-
-   return ret;
-}
-
-void set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
-{
-   if (cr0 & CR0_RESERVED_BITS) {
-   printk(KERN_DEBUG "set_cr0: 0x%lx #GP, reserved bits 0x%lx\n",
-  cr0, vcpu->cr0);
-   inject_gp(vcpu);
-   return;
-   }
-
-   if ((cr0 & X86_CR0_NW) && !(cr0 & X86_CR0_CD)) {
-   printk(KERN_DEBUG "set_cr0: #GP, CD == 0 && NW == 1\n");
-   inject_gp(vcpu);
-   return;
-   }
-
-   if ((cr0 & X86_CR0_PG) && !(cr0 & X86_CR0_PE)) {
-   printk(KERN_DEBUG "set_cr0: #GP, set PG flag "
-  "and a clear PE flag\n");
-   inject_gp(vcpu);
-   return;
-   }
-
-   if (!is_paging(vcpu) && (cr0 & X86_CR0_PG)) {
-#ifdef CONFIG_X86_64
-   if ((vcpu->shadow_efer & EFER_LME)) {
-   int cs_db, cs_l;
-
-   if (!is_pae(vcpu)) {
-   printk(KERN_DEBUG "set_cr0: #GP, start paging "
-  "in long mode while PAE is disabled\n");
-   inject_gp(vcpu);
-   return;
-   }
-   kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
-   if (cs_l) {
-   printk(KERN_DEBUG "set_cr0: #GP, start paging "
-  "in long mode while CS.L == 1\n");
-   inject_gp(vcpu);
-  

[kvm-devel] [PATCH 15/55] KVM: x86 emulator: don't depend on cr2 for mov abs emulation

2007-12-26 Thread Avi Kivity
The 'mov abs' instruction family (opcodes 0xa0 - 0xa3) still depends on cr2
provided by the page fault handler.  This is wrong for several reasons:

- if an instruction accessed misaligned data that crosses a page boundary,
  and if the fault happened on the second page, cr2 will point at the
  second page, not the data itself.

- if we're emulating in real mode, or due to a FlexPriority exit, there
  is no cr2 generated.

So, this change adds decoding for this instruction form and drops reliance
on cr2.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |   50 +++--
 1 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 73e3580..087a820 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -63,8 +63,9 @@
 /* Destination is only written; never read. */
 #define Mov (1<<7)
 #define BitOp   (1<<8)
+#define MemAbs  (1<<9)  /* Memory operand is absolute displacement */
 
-static u8 opcode_table[256] = {
+static u16 opcode_table[256] = {
/* 0x00 - 0x07 */
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
@@ -134,8 +135,8 @@ static u8 opcode_table[256] = {
/* 0x90 - 0x9F */
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ImplicitOps, ImplicitOps, 0, 0,
/* 0xA0 - 0xA7 */
-   ByteOp | DstReg | SrcMem | Mov, DstReg | SrcMem | Mov,
-   ByteOp | DstMem | SrcReg | Mov, DstMem | SrcReg | Mov,
+   ByteOp | DstReg | SrcMem | Mov | MemAbs, DstReg | SrcMem | Mov | MemAbs,
+   ByteOp | DstMem | SrcReg | Mov | MemAbs, DstMem | SrcReg | Mov | MemAbs,
ByteOp | ImplicitOps | Mov, ImplicitOps | Mov,
ByteOp | ImplicitOps, ImplicitOps,
/* 0xA8 - 0xAF */
@@ -755,16 +756,6 @@ done_prefixes:
break;
}
}
-   if (!c->override_base)
-   c->override_base = &ctxt->ds_base;
-   if (mode == X86EMUL_MODE_PROT64 &&
-   c->override_base != &ctxt->fs_base &&
-   c->override_base != &ctxt->gs_base)
-   c->override_base = NULL;
-
-   if (c->override_base)
-   c->modrm_ea += *c->override_base;
-
if (rip_relative) {
c->modrm_ea += c->eip;
switch (c->d & SrcMask) {
@@ -781,12 +772,35 @@ done_prefixes:
c->modrm_ea += c->op_bytes;
}
}
-   if (c->ad_bytes != 8)
-   c->modrm_ea = (u32)c->modrm_ea;
 modrm_done:
;
+   } else if (c->d & MemAbs) {
+   switch (c->ad_bytes) {
+   case 2:
+   c->modrm_ea = insn_fetch(u16, 2, c->eip);
+   break;
+   case 4:
+   c->modrm_ea = insn_fetch(u32, 4, c->eip);
+   break;
+   case 8:
+   c->modrm_ea = insn_fetch(u64, 8, c->eip);
+   break;
+   }
+
}
 
+   if (!c->override_base)
+   c->override_base = &ctxt->ds_base;
+   if (mode == X86EMUL_MODE_PROT64 &&
+   c->override_base != &ctxt->fs_base &&
+   c->override_base != &ctxt->gs_base)
+   c->override_base = NULL;
+
+   if (c->override_base)
+   c->modrm_ea += *c->override_base;
+
+   if (c->ad_bytes != 8)
+   c->modrm_ea = (u32)c->modrm_ea;
/*
 * Decode and fetch the source operand: register, memory
 * or immediate.
@@ -1171,7 +1185,7 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
memcpy(c->regs, ctxt->vcpu->regs, sizeof c->regs);
saved_eip = c->eip;
 
-   if ((c->d & ModRM) && (c->modrm_mod != 3))
+   if (((c->d & ModRM) && (c->modrm_mod != 3)) || (c->d & MemAbs))
cr2 = c->modrm_ea;
 
if (c->src.type == OP_MEM) {
@@ -1326,13 +1340,9 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
case 0xa0 ... 0xa1: /* mov */
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
c->dst.val = c->src.val;
-   /* skip src displacement */
-   c->eip += c->ad_bytes;
break;
case 0xa2 ... 0xa3: /* mov */
c->dst.val = (unsigned long)c->regs[VCPU_REGS_RA

[kvm-devel] [PATCH 18/55] KVM: Portability: Split kvm_vm_ioctl v3

2007-12-26 Thread Avi Kivity
From: Carsten Otte <[EMAIL PROTECTED]>

This patch splits kvm_vm_ioctl into archtecture independent parts, and
x86 specific parts which go to kvm_arch_vcpu_ioctl in x86.c.
The patch is unchanged since last submission.

Common ioctls for all architectures are:
KVM_CREATE_VCPU, KVM_GET_DIRTY_LOG, KVM_SET_USER_MEMORY_REGION

x86 specific ioctls are:
KVM_SET_MEMORY_REGION,
KVM_GET/SET_NR_MMU_PAGES, KVM_SET_MEMORY_ALIAS, KVM_CREATE_IRQCHIP,
KVM_CREATE_IRQ_LINE, KVM_GET/SET_IRQCHIP
KVM_SET_TSS_ADDR

Signed-off-by: Carsten Otte <[EMAIL PROTECTED]>
Reviewed-by: Christian Borntraeger <[EMAIL PROTECTED]>
Acked-by: Hollis Blanchard <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |7 ++
 drivers/kvm/kvm_main.c |  255 +--
 drivers/kvm/x86.c  |  258 
 3 files changed, 271 insertions(+), 249 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 3d07d9b..516f79f 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -620,6 +620,13 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
 unsigned int ioctl, unsigned long arg);
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
+int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+  struct
+  kvm_userspace_memory_region *mem,
+  int user_alloc);
+long kvm_arch_vm_ioctl(struct file *filp,
+  unsigned int ioctl, unsigned long arg);
+void kvm_arch_destroy_vm(struct kvm *kvm);
 
 __init void kvm_arch_init(void);
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 26a6399..9c0175d 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -792,36 +792,16 @@ out:
 }
 EXPORT_SYMBOL_GPL(kvm_set_memory_region);
 
-static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct
- kvm_userspace_memory_region *mem,
- int user_alloc)
+int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+  struct
+  kvm_userspace_memory_region *mem,
+  int user_alloc)
 {
if (mem->slot >= KVM_MEMORY_SLOTS)
return -EINVAL;
return kvm_set_memory_region(kvm, mem, user_alloc);
 }
 
-static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
- u32 kvm_nr_mmu_pages)
-{
-   if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
-   return -EINVAL;
-
-   mutex_lock(&kvm->lock);
-
-   kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
-   kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
-
-   mutex_unlock(&kvm->lock);
-   return 0;
-}
-
-static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
-{
-   return kvm->n_alloc_mmu_pages;
-}
-
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
@@ -867,111 +847,6 @@ out:
return r;
 }
 
-/*
- * Set a new alias region.  Aliases map a portion of physical memory into
- * another portion.  This is useful for memory windows, for example the PC
- * VGA region.
- */
-static int kvm_vm_ioctl_set_memory_alias(struct kvm *kvm,
-struct kvm_memory_alias *alias)
-{
-   int r, n;
-   struct kvm_mem_alias *p;
-
-   r = -EINVAL;
-   /* General sanity checks */
-   if (alias->memory_size & (PAGE_SIZE - 1))
-   goto out;
-   if (alias->guest_phys_addr & (PAGE_SIZE - 1))
-   goto out;
-   if (alias->slot >= KVM_ALIAS_SLOTS)
-   goto out;
-   if (alias->guest_phys_addr + alias->memory_size
-   < alias->guest_phys_addr)
-   goto out;
-   if (alias->target_phys_addr + alias->memory_size
-   < alias->target_phys_addr)
-   goto out;
-
-   mutex_lock(&kvm->lock);
-
-   p = &kvm->aliases[alias->slot];
-   p->base_gfn = alias->guest_phys_addr >> PAGE_SHIFT;
-   p->npages = alias->memory_size >> PAGE_SHIFT;
-   p->target_gfn = alias->target_phys_addr >> PAGE_SHIFT;
-
-   for (n = KVM_ALIAS_SLOTS; n > 0; --n)
-   if (kvm->aliases[n - 1].npages)
-   break;
-   kvm->naliases = n;
-
-   kvm_mmu_zap_all(kvm);
-
-   mutex_unlock(&kvm->lock);
-
-   return 0;
-
-out:
-   return r;
-}
-
-static int kvm_vm_ioctl_get_irqchip(struct kvm *kvm, struct kvm_irqchip *chip)
-{
-   int r;
-
-   r = 0;
-   switch (chip->chip_id) {
-   case KVM_IRQCHIP_PIC_MASTER:
-   memcpy(&c

[kvm-devel] [PATCH 42/55] KVM: x86 emulator: remove 8 bytes operands emulator for call near instruction

2007-12-26 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

it is removed beacuse it isnt supported on a real host

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 64888a6..e697947 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1558,9 +1558,6 @@ special_insn:
case 4:
rel = insn_fetch(s32, 4, c->eip);
break;
-   case 8:
-   rel = insn_fetch(s64, 8, c->eip);
-   break;
default:
DPRINTF("Call: Invalid op_bytes\n");
goto cannot_emulate;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 39/55] KVM: Fix faults during injection of real-mode interrupts

2007-12-26 Thread Avi Kivity
If vmx fails to inject a real-mode interrupt while fetching the interrupt
redirection table, it fails to record this in the vectoring information
field.  So we detect this condition and do it ourselves.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |   51 +--
 1 files changed, 49 insertions(+), 2 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index bde2d07..63c230d 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -65,7 +65,13 @@ struct vcpu_vmx {
int   fs_reload_needed;
int   guest_efer_loaded;
} host_state;
-
+   struct {
+   struct {
+   bool pending;
+   u8 vector;
+   unsigned rip;
+   } irq;
+   } rmode;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -1713,11 +1719,16 @@ out:
 
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
if (vcpu->rmode.active) {
+   vmx->rmode.irq.pending = true;
+   vmx->rmode.irq.vector = irq;
+   vmx->rmode.irq.rip = vmcs_readl(GUEST_RIP);
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
 irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
-   vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) - 1);
+   vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip - 1);
return;
}
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
@@ -2251,6 +2262,17 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
return;
}
if (unlikely(idtv_info_field & INTR_INFO_VALID_MASK)) {
+   if ((idtv_info_field & VECTORING_INFO_TYPE_MASK)
+   == INTR_TYPE_EXT_INTR
+   && vcpu->rmode.active) {
+   u8 vect = idtv_info_field & VECTORING_INFO_VECTOR_MASK;
+
+   vmx_inject_irq(vcpu, vect);
+   if (unlikely(has_ext_irq))
+   enable_irq_window(vcpu);
+   return;
+   }
+
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
vmcs_read32(VM_EXIT_INSTRUCTION_LEN));
@@ -2275,6 +2297,29 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
enable_irq_window(vcpu);
 }
 
+/*
+ * Failure to inject an interrupt should give us the information
+ * in IDT_VECTORING_INFO_FIELD.  However, if the failure occurs
+ * when fetching the interrupt redirection bitmap in the real-mode
+ * tss, this doesn't happen.  So we do it ourselves.
+ */
+static void fixup_rmode_irq(struct vcpu_vmx *vmx)
+{
+   vmx->rmode.irq.pending = 0;
+   if (vmcs_readl(GUEST_RIP) + 1 != vmx->rmode.irq.rip)
+   return;
+   vmcs_writel(GUEST_RIP, vmx->rmode.irq.rip);
+   if (vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK) {
+   vmx->idt_vectoring_info &= ~VECTORING_INFO_TYPE_MASK;
+   vmx->idt_vectoring_info |= INTR_TYPE_EXT_INTR;
+   return;
+   }
+   vmx->idt_vectoring_info =
+   VECTORING_INFO_VALID_MASK
+   | INTR_TYPE_EXT_INTR
+   | vmx->rmode.irq.vector;
+}
+
 static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -2401,6 +2446,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
  );
 
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+   if (vmx->rmode.irq.pending)
+   fixup_rmode_irq(vmx);
 
vcpu->interrupt_window_open =
(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 40/55] KVM: VMX: Comment VMX primary/secondary exec ctl definitions

2007-12-26 Thread Avi Kivity
From: Eddie Dong <[EMAIL PROTECTED]>

Add comments for secondary/primary Processor-Based VM-execution controls.

Signed-off-by: Yaozu (Eddie) Dong <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.h |   10 --
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h
index d757b36..6d32bc6 100644
--- a/drivers/kvm/vmx.h
+++ b/drivers/kvm/vmx.h
@@ -25,6 +25,9 @@
  *
  */
 
+/*
+ * Definitions of Primary Processor-Based VM-Execution Controls.
+ */
 #define CPU_BASED_VIRTUAL_INTR_PENDING  0x0004
 #define CPU_BASED_USE_TSC_OFFSETING 0x0008
 #define CPU_BASED_HLT_EXITING   0x0080
@@ -42,6 +45,11 @@
 #define CPU_BASED_MONITOR_EXITING   0x2000
 #define CPU_BASED_PAUSE_EXITING 0x4000
 #define CPU_BASED_ACTIVATE_SECONDARY_CONTROLS   0x8000
+/*
+ * Definitions of Secondary Processor-Based VM-Execution Controls.
+ */
+#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x0001
+
 
 #define PIN_BASED_EXT_INTR_MASK 0x0001
 #define PIN_BASED_NMI_EXITING   0x0008
@@ -54,8 +62,6 @@
 #define VM_ENTRY_SMM0x0400
 #define VM_ENTRY_DEACT_DUAL_MONITOR 0x0800
 
-#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x0001
-
 /* VMCS Encodings */
 enum vmcs_field {
GUEST_ES_SELECTOR   = 0x0800,
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 22/55] KVM: VMX: Enable memory mapped TPR shadow (FlexPriority)

2007-12-26 Thread Avi Kivity
From: Sheng Yang <[EMAIL PROTECTED]>

This patch based on CR8/TPR patch, and enable the TPR shadow (FlexPriority)
for 32bit Windows.  Since TPR is accessed very frequently by 32bit
Windows, especially SMP guest, with FlexPriority enabled, we saw significant
performance gain.

Signed-off-by: Sheng Yang <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |4 ++
 drivers/kvm/kvm_main.c |   56 ---
 drivers/kvm/vmx.c  |  117 ++-
 drivers/kvm/vmx.h  |5 ++
 4 files changed, 152 insertions(+), 30 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 516f79f..22317d6 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -383,6 +383,7 @@ struct kvm {
struct kvm_ioapic *vioapic;
int round_robin_prev_vcpu;
unsigned int tss_addr;
+   struct page *apic_access_page;
 };
 
 static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
@@ -522,6 +523,9 @@ int is_error_page(struct page *page);
 int kvm_set_memory_region(struct kvm *kvm,
  struct kvm_userspace_memory_region *mem,
  int user_alloc);
+int __kvm_set_memory_region(struct kvm *kvm,
+   struct kvm_userspace_memory_region *mem,
+   int user_alloc);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 void kvm_release_page(struct page *page);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index af4b470..c3dc24f 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -362,10 +362,12 @@ EXPORT_SYMBOL_GPL(fx_init);
  * space.
  *
  * Discontiguous memory is allowed, mostly for framebuffers.
+ *
+ * Must be called holding kvm->lock.
  */
-int kvm_set_memory_region(struct kvm *kvm,
- struct kvm_userspace_memory_region *mem,
- int user_alloc)
+int __kvm_set_memory_region(struct kvm *kvm,
+   struct kvm_userspace_memory_region *mem,
+   int user_alloc)
 {
int r;
gfn_t base_gfn;
@@ -392,8 +394,6 @@ int kvm_set_memory_region(struct kvm *kvm,
if (!npages)
mem->flags &= ~KVM_MEM_LOG_DIRTY_PAGES;
 
-   mutex_lock(&kvm->lock);
-
new = old = *memslot;
 
new.base_gfn = base_gfn;
@@ -403,7 +403,7 @@ int kvm_set_memory_region(struct kvm *kvm,
/* Disallow changing a memory slot's size. */
r = -EINVAL;
if (npages && old.npages && npages != old.npages)
-   goto out_unlock;
+   goto out_free;
 
/* Check for overlaps */
r = -EEXIST;
@@ -414,7 +414,7 @@ int kvm_set_memory_region(struct kvm *kvm,
continue;
if (!((base_gfn + npages <= s->base_gfn) ||
  (base_gfn >= s->base_gfn + s->npages)))
-   goto out_unlock;
+   goto out_free;
}
 
/* Free page dirty bitmap if unneeded */
@@ -428,7 +428,7 @@ int kvm_set_memory_region(struct kvm *kvm,
new.rmap = vmalloc(npages * sizeof(struct page *));
 
if (!new.rmap)
-   goto out_unlock;
+   goto out_free;
 
memset(new.rmap, 0, npages * sizeof(*new.rmap));
 
@@ -445,7 +445,7 @@ int kvm_set_memory_region(struct kvm *kvm,
up_write(¤t->mm->mmap_sem);
 
if (IS_ERR((void *)new.userspace_addr))
-   goto out_unlock;
+   goto out_free;
}
} else {
if (!old.user_alloc && old.rmap) {
@@ -468,7 +468,7 @@ int kvm_set_memory_region(struct kvm *kvm,
 
new.dirty_bitmap = vmalloc(dirty_bytes);
if (!new.dirty_bitmap)
-   goto out_unlock;
+   goto out_free;
memset(new.dirty_bitmap, 0, dirty_bytes);
}
 
@@ -498,18 +498,28 @@ int kvm_set_memory_region(struct kvm *kvm,
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
kvm_flush_remote_tlbs(kvm);
 
-   mutex_unlock(&kvm->lock);
-
kvm_free_physmem_slot(&old, &new);
return 0;
 
-out_unlock:
-   mutex_unlock(&kvm->lock);
+out_free:
kvm_free_physmem_slot(&new, &old);
 out:
return r;
 
 }
+EXPORT_SYMBOL_GPL(__kvm_set_memory_region);
+
+int kvm_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ int user_alloc)
+{
+   int r;
+
+   mutex_lock(&kvm->lock);
+   r = __kvm_set_memory_region(kvm, mem, user_alloc);
+   mutex_unlock(&kvm->lock);
+   return r;
+

[kvm-devel] [PATCH 45/55] KVM: introduce gfn_to_hva()

2007-12-26 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

Convert a guest frame number to the corresponding host virtual address.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |   25 +
 1 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index e3c7f3b..d318c28 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -559,28 +559,37 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
+static unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+{
+   struct kvm_memory_slot *slot;
+
+   gfn = unalias_gfn(kvm, gfn);
+   slot = __gfn_to_memslot(kvm, gfn);
+   if (!slot)
+   return bad_hva();
+   return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
+}
+
 /*
  * Requires current->mm->mmap_sem to be held
  */
 static struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
-   struct kvm_memory_slot *slot;
struct page *page[1];
+   unsigned long addr;
int npages;
 
might_sleep();
 
-   gfn = unalias_gfn(kvm, gfn);
-   slot = __gfn_to_memslot(kvm, gfn);
-   if (!slot) {
+   addr = gfn_to_hva(kvm, gfn);
+   if (kvm_is_error_hva(addr)) {
get_page(bad_page);
return bad_page;
}
 
-   npages = get_user_pages(current, current->mm,
-   slot->userspace_addr
-   + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
-   1, 1, page, NULL);
+   npages = get_user_pages(current, current->mm, addr, 1, 1, 1, page,
+   NULL);
+
if (npages != 1) {
get_page(bad_page);
return bad_page;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 41/55] KVM: VMX: wbinvd exiting

2007-12-26 Thread Avi Kivity
From: Eddie Dong <[EMAIL PROTECTED]>

Add wbinvd VM Exit support to prepare for pass-through
device cache emulation and also enhance real time
responsiveness.

Signed-off-by: Yaozu (Eddie) Dong <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |   11 ++-
 drivers/kvm/vmx.h |2 ++
 2 files changed, 12 insertions(+), 1 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 63c230d..ad16865 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -980,7 +980,8 @@ static __init int setup_vmcs_config(struct vmcs_config 
*vmcs_conf)
 #endif
if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
min = 0;
-   opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+   opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+   SECONDARY_EXEC_WBINVD_EXITING;
if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
&_cpu_based_2nd_exec_control) < 0)
return -EIO;
@@ -2133,6 +2134,13 @@ static int handle_vmcall(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
return 1;
 }
 
+static int handle_wbinvd(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+{
+   skip_emulated_instruction(vcpu);
+   /* TODO: Add support for VT-d/pass-through device */
+   return 1;
+}
+
 static int handle_apic_access(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
u64 exit_qualification;
@@ -2174,6 +2182,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu 
*vcpu,
[EXIT_REASON_VMCALL]  = handle_vmcall,
[EXIT_REASON_TPR_BELOW_THRESHOLD] = handle_tpr_below_threshold,
[EXIT_REASON_APIC_ACCESS] = handle_apic_access,
+   [EXIT_REASON_WBINVD]  = handle_wbinvd,
 };
 
 static const int kvm_vmx_max_exit_handlers =
diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h
index 6d32bc6..d52ae8d 100644
--- a/drivers/kvm/vmx.h
+++ b/drivers/kvm/vmx.h
@@ -49,6 +49,7 @@
  * Definitions of Secondary Processor-Based VM-Execution Controls.
  */
 #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x0001
+#define SECONDARY_EXEC_WBINVD_EXITING  0x0040
 
 
 #define PIN_BASED_EXT_INTR_MASK 0x0001
@@ -223,6 +224,7 @@ enum vmcs_field {
 #define EXIT_REASON_MWAIT_INSTRUCTION   36
 #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
 #define EXIT_REASON_APIC_ACCESS 44
+#define EXIT_REASON_WBINVD 54
 
 /*
  * Interruption-information format
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 31/55] KVM: x86 emulator: Hoist modrm and abs decoding into separate functions

2007-12-26 Thread Avi Kivity
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |  337 -
 1 files changed, 177 insertions(+), 160 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index b352a6c..64888a6 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -548,14 +548,182 @@ static void decode_register_operand(struct operand *op,
op->orig_val = op->val;
 }
 
+static int decode_modrm(struct x86_emulate_ctxt *ctxt,
+   struct x86_emulate_ops *ops)
+{
+   struct decode_cache *c = &ctxt->decode;
+   u8 sib;
+   int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
+   int rc = 0;
+
+   if (c->rex_prefix) {
+   c->modrm_reg = (c->rex_prefix & 4) << 1;/* REX.R */
+   index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
+   c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
+   }
+
+   c->modrm = insn_fetch(u8, 1, c->eip);
+   c->modrm_mod |= (c->modrm & 0xc0) >> 6;
+   c->modrm_reg |= (c->modrm & 0x38) >> 3;
+   c->modrm_rm |= (c->modrm & 0x07);
+   c->modrm_ea = 0;
+   c->use_modrm_ea = 1;
+
+   if (c->modrm_mod == 3) {
+   c->modrm_val = *(unsigned long *)
+   decode_register(c->modrm_rm, c->regs, c->d & ByteOp);
+   return rc;
+   }
+
+   if (c->ad_bytes == 2) {
+   unsigned bx = c->regs[VCPU_REGS_RBX];
+   unsigned bp = c->regs[VCPU_REGS_RBP];
+   unsigned si = c->regs[VCPU_REGS_RSI];
+   unsigned di = c->regs[VCPU_REGS_RDI];
+
+   /* 16-bit ModR/M decode. */
+   switch (c->modrm_mod) {
+   case 0:
+   if (c->modrm_rm == 6)
+   c->modrm_ea += insn_fetch(u16, 2, c->eip);
+   break;
+   case 1:
+   c->modrm_ea += insn_fetch(s8, 1, c->eip);
+   break;
+   case 2:
+   c->modrm_ea += insn_fetch(u16, 2, c->eip);
+   break;
+   }
+   switch (c->modrm_rm) {
+   case 0:
+   c->modrm_ea += bx + si;
+   break;
+   case 1:
+   c->modrm_ea += bx + di;
+   break;
+   case 2:
+   c->modrm_ea += bp + si;
+   break;
+   case 3:
+   c->modrm_ea += bp + di;
+   break;
+   case 4:
+   c->modrm_ea += si;
+   break;
+   case 5:
+   c->modrm_ea += di;
+   break;
+   case 6:
+   if (c->modrm_mod != 0)
+   c->modrm_ea += bp;
+   break;
+   case 7:
+   c->modrm_ea += bx;
+   break;
+   }
+   if (c->modrm_rm == 2 || c->modrm_rm == 3 ||
+   (c->modrm_rm == 6 && c->modrm_mod != 0))
+   if (!c->override_base)
+   c->override_base = &ctxt->ss_base;
+   c->modrm_ea = (u16)c->modrm_ea;
+   } else {
+   /* 32/64-bit ModR/M decode. */
+   switch (c->modrm_rm) {
+   case 4:
+   case 12:
+   sib = insn_fetch(u8, 1, c->eip);
+   index_reg |= (sib >> 3) & 7;
+   base_reg |= sib & 7;
+   scale = sib >> 6;
+
+   switch (base_reg) {
+   case 5:
+   if (c->modrm_mod != 0)
+   c->modrm_ea += c->regs[base_reg];
+   else
+   c->modrm_ea +=
+   insn_fetch(s32, 4, c->eip);
+   break;
+   default:
+   c->modrm_ea += c->regs[base_reg];
+   }
+   switch (index_reg) {
+   case 4:
+   break;
+   default:
+   c->modrm_ea += c->regs[index_reg] << scale;
+   }
+   break;
+   case 5:
+   if (c->modrm_mod != 0)
+  

[kvm-devel] [PATCH 43/55] KVM: Simplify CPU_TASKS_FROZEN cpu notifier handling

2007-12-26 Thread Avi Kivity
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 302473d..dacdc66 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1335,21 +1335,19 @@ static int kvm_cpu_hotplug(struct notifier_block 
*notifier, unsigned long val,
 {
int cpu = (long)v;
 
+   val &= ~CPU_TASKS_FROZEN;
switch (val) {
case CPU_DYING:
-   case CPU_DYING_FROZEN:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
   cpu);
hardware_disable(NULL);
break;
case CPU_UP_CANCELED:
-   case CPU_UP_CANCELED_FROZEN:
printk(KERN_INFO "kvm: disabling virtualization on CPU%d\n",
   cpu);
smp_call_function_single(cpu, hardware_disable, NULL, 0, 1);
break;
case CPU_ONLINE:
-   case CPU_ONLINE_FROZEN:
printk(KERN_INFO "kvm: enabling virtualization on CPU%d\n",
   cpu);
smp_call_function_single(cpu, hardware_enable, NULL, 0, 1);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 33/55] KVM: Portability: Move x86 instruction emulation code to x86.c

2007-12-26 Thread Avi Kivity
From: Hollis Blanchard <[EMAIL PROTECTED]>

Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |1 +
 drivers/kvm/kvm_main.c |  177 +---
 drivers/kvm/x86.c  |  175 +++
 3 files changed, 177 insertions(+), 176 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index d030a82..ef2a6a8 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -591,6 +591,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 
data);
 
 void fx_init(struct kvm_vcpu *vcpu);
 
+void kvm_vcpu_block(struct kvm_vcpu *vcpu);
 void kvm_resched(struct kvm_vcpu *vcpu);
 void kvm_load_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 11580be..a779d42 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -789,7 +789,7 @@ void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 /*
  * The vCPU has executed a HLT instruction with in-kernel mode enabled.
  */
-static void kvm_vcpu_block(struct kvm_vcpu *vcpu)
+void kvm_vcpu_block(struct kvm_vcpu *vcpu)
 {
DECLARE_WAITQUEUE(wait, current);
 
@@ -812,144 +812,6 @@ static void kvm_vcpu_block(struct kvm_vcpu *vcpu)
remove_wait_queue(&vcpu->wq, &wait);
 }
 
-int kvm_emulate_halt(struct kvm_vcpu *vcpu)
-{
-   ++vcpu->stat.halt_exits;
-   if (irqchip_in_kernel(vcpu->kvm)) {
-   vcpu->mp_state = VCPU_MP_STATE_HALTED;
-   kvm_vcpu_block(vcpu);
-   if (vcpu->mp_state != VCPU_MP_STATE_RUNNABLE)
-   return -EINTR;
-   return 1;
-   } else {
-   vcpu->run->exit_reason = KVM_EXIT_HLT;
-   return 0;
-   }
-}
-EXPORT_SYMBOL_GPL(kvm_emulate_halt);
-
-int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
-{
-   unsigned long nr, a0, a1, a2, a3, ret;
-
-   kvm_x86_ops->cache_regs(vcpu);
-
-   nr = vcpu->regs[VCPU_REGS_RAX];
-   a0 = vcpu->regs[VCPU_REGS_RBX];
-   a1 = vcpu->regs[VCPU_REGS_RCX];
-   a2 = vcpu->regs[VCPU_REGS_RDX];
-   a3 = vcpu->regs[VCPU_REGS_RSI];
-
-   if (!is_long_mode(vcpu)) {
-   nr &= 0x;
-   a0 &= 0x;
-   a1 &= 0x;
-   a2 &= 0x;
-   a3 &= 0x;
-   }
-
-   switch (nr) {
-   default:
-   ret = -KVM_ENOSYS;
-   break;
-   }
-   vcpu->regs[VCPU_REGS_RAX] = ret;
-   kvm_x86_ops->decache_regs(vcpu);
-   return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_emulate_hypercall);
-
-int kvm_fix_hypercall(struct kvm_vcpu *vcpu)
-{
-   char instruction[3];
-   int ret = 0;
-
-   mutex_lock(&vcpu->kvm->lock);
-
-   /*
-* Blow out the MMU to ensure that no other VCPU has an active mapping
-* to ensure that the updated hypercall appears atomically across all
-* VCPUs.
-*/
-   kvm_mmu_zap_all(vcpu->kvm);
-
-   kvm_x86_ops->cache_regs(vcpu);
-   kvm_x86_ops->patch_hypercall(vcpu, instruction);
-   if (emulator_write_emulated(vcpu->rip, instruction, 3, vcpu)
-   != X86EMUL_CONTINUE)
-   ret = -EFAULT;
-
-   mutex_unlock(&vcpu->kvm->lock);
-
-   return ret;
-}
-
-static u64 mk_cr_64(u64 curr_cr, u32 new_val)
-{
-   return (curr_cr & ~((1ULL << 32) - 1)) | new_val;
-}
-
-void realmode_lgdt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
-{
-   struct descriptor_table dt = { limit, base };
-
-   kvm_x86_ops->set_gdt(vcpu, &dt);
-}
-
-void realmode_lidt(struct kvm_vcpu *vcpu, u16 limit, unsigned long base)
-{
-   struct descriptor_table dt = { limit, base };
-
-   kvm_x86_ops->set_idt(vcpu, &dt);
-}
-
-void realmode_lmsw(struct kvm_vcpu *vcpu, unsigned long msw,
-  unsigned long *rflags)
-{
-   lmsw(vcpu, msw);
-   *rflags = kvm_x86_ops->get_rflags(vcpu);
-}
-
-unsigned long realmode_get_cr(struct kvm_vcpu *vcpu, int cr)
-{
-   kvm_x86_ops->decache_cr4_guest_bits(vcpu);
-   switch (cr) {
-   case 0:
-   return vcpu->cr0;
-   case 2:
-   return vcpu->cr2;
-   case 3:
-   return vcpu->cr3;
-   case 4:
-   return vcpu->cr4;
-   default:
-   vcpu_printf(vcpu, "%s: unexpected cr %u\n", __FUNCTION__, cr);
-   return 0;
-   }
-}
-
-void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, unsigned long val,
-unsigned long *rflags)
-{
-   switch (cr) {
-   case 0:
-   set_cr0(vcpu, mk_cr_64(vcpu->cr0, val));
-   *rflags = kvm_x86_ops->get_rflags(vcpu);
-  

[kvm-devel] [PATCH 24/55] KVM: Portability: Move kvm_get/set_msr[_common] to x86.c

2007-12-26 Thread Avi Kivity
From: Carsten Otte <[EMAIL PROTECTED]>

This patch moves the implementation of the functions of kvm_get/set_msr,
kvm_get/set_msr_common, and set_efer from kvm_main.c to x86.c. The
definition of EFER_RESERVED_BITS is moved too.

Signed-off-by: Carsten Otte <[EMAIL PROTECTED]>
Acked-by: Hollis Blanchard <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |  133 ---
 drivers/kvm/x86.c  |  134 
 2 files changed, 134 insertions(+), 133 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 7186791..5eccf27 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -90,8 +90,6 @@ static struct kvm_stats_debugfs_item {
 
 static struct dentry *debugfs_dir;
 
-#define EFER_RESERVED_BITS 0xf2fe
-
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
   unsigned long arg);
 
@@ -1356,137 +1354,6 @@ void realmode_set_cr(struct kvm_vcpu *vcpu, int cr, 
unsigned long val,
}
 }
 
-int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
-{
-   u64 data;
-
-   switch (msr) {
-   case 0xc0010010: /* SYSCFG */
-   case 0xc0010015: /* HWCR */
-   case MSR_IA32_PLATFORM_ID:
-   case MSR_IA32_P5_MC_ADDR:
-   case MSR_IA32_P5_MC_TYPE:
-   case MSR_IA32_MC0_CTL:
-   case MSR_IA32_MCG_STATUS:
-   case MSR_IA32_MCG_CAP:
-   case MSR_IA32_MC0_MISC:
-   case MSR_IA32_MC0_MISC+4:
-   case MSR_IA32_MC0_MISC+8:
-   case MSR_IA32_MC0_MISC+12:
-   case MSR_IA32_MC0_MISC+16:
-   case MSR_IA32_UCODE_REV:
-   case MSR_IA32_PERF_STATUS:
-   case MSR_IA32_EBL_CR_POWERON:
-   /* MTRR registers */
-   case 0xfe:
-   case 0x200 ... 0x2ff:
-   data = 0;
-   break;
-   case 0xcd: /* fsb frequency */
-   data = 3;
-   break;
-   case MSR_IA32_APICBASE:
-   data = kvm_get_apic_base(vcpu);
-   break;
-   case MSR_IA32_MISC_ENABLE:
-   data = vcpu->ia32_misc_enable_msr;
-   break;
-#ifdef CONFIG_X86_64
-   case MSR_EFER:
-   data = vcpu->shadow_efer;
-   break;
-#endif
-   default:
-   pr_unimpl(vcpu, "unhandled rdmsr: 0x%x\n", msr);
-   return 1;
-   }
-   *pdata = data;
-   return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_get_msr_common);
-
-/*
- * Reads an msr value (of 'msr_index') into 'pdata'.
- * Returns 0 on success, non-0 otherwise.
- * Assumes vcpu_load() was already called.
- */
-int kvm_get_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata)
-{
-   return kvm_x86_ops->get_msr(vcpu, msr_index, pdata);
-}
-
-#ifdef CONFIG_X86_64
-
-static void set_efer(struct kvm_vcpu *vcpu, u64 efer)
-{
-   if (efer & EFER_RESERVED_BITS) {
-   printk(KERN_DEBUG "set_efer: 0x%llx #GP, reserved bits\n",
-  efer);
-   inject_gp(vcpu);
-   return;
-   }
-
-   if (is_paging(vcpu)
-   && (vcpu->shadow_efer & EFER_LME) != (efer & EFER_LME)) {
-   printk(KERN_DEBUG "set_efer: #GP, change LME while paging\n");
-   inject_gp(vcpu);
-   return;
-   }
-
-   kvm_x86_ops->set_efer(vcpu, efer);
-
-   efer &= ~EFER_LMA;
-   efer |= vcpu->shadow_efer & EFER_LMA;
-
-   vcpu->shadow_efer = efer;
-}
-
-#endif
-
-int kvm_set_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 data)
-{
-   switch (msr) {
-#ifdef CONFIG_X86_64
-   case MSR_EFER:
-   set_efer(vcpu, data);
-   break;
-#endif
-   case MSR_IA32_MC0_STATUS:
-   pr_unimpl(vcpu, "%s: MSR_IA32_MC0_STATUS 0x%llx, nop\n",
-  __FUNCTION__, data);
-   break;
-   case MSR_IA32_MCG_STATUS:
-   pr_unimpl(vcpu, "%s: MSR_IA32_MCG_STATUS 0x%llx, nop\n",
-   __FUNCTION__, data);
-   break;
-   case MSR_IA32_UCODE_REV:
-   case MSR_IA32_UCODE_WRITE:
-   case 0x200 ... 0x2ff: /* MTRRs */
-   break;
-   case MSR_IA32_APICBASE:
-   kvm_set_apic_base(vcpu, data);
-   break;
-   case MSR_IA32_MISC_ENABLE:
-   vcpu->ia32_misc_enable_msr = data;
-   break;
-   default:
-   pr_unimpl(vcpu, "unhandled wrmsr: 0x%x\n", msr);
-   return 1;
-   }
-   return 0;
-}
-EXPORT_SYMBOL_GPL(kvm_set_msr_common);
-
-/*
- * Writes msr value into into the appropriate "register".
- * Returns 0 on success, non-0 otherwise.
- * Assumes vcpu_load() was already called.
- */
-int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
-{
- 

[kvm-devel] [PATCH 37/55] KVM: VMX: Use vmx to inject real-mode interrupts

2007-12-26 Thread Avi Kivity
Instead of injecting real-mode interrupts by writing the interrupt frame into
guest memory, abuse vmx by injecting a software interrupt.  We need to
pretend the software interrupt instruction had a length > 0, so we have to
adjust rip backward.

This lets us not to mess with writing guest memory, which is complex and also
sleeps.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |   53 -
 drivers/kvm/vmx.h |1 +
 2 files changed, 5 insertions(+), 49 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 6fc981d..1166132 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1709,58 +1709,13 @@ out:
return ret;
 }
 
-static void inject_rmode_irq(struct kvm_vcpu *vcpu, int irq)
-{
-   u16 ent[2];
-   u16 cs;
-   u16 ip;
-   unsigned long flags;
-   unsigned long ss_base = vmcs_readl(GUEST_SS_BASE);
-   u16 sp =  vmcs_readl(GUEST_RSP);
-   u32 ss_limit = vmcs_read32(GUEST_SS_LIMIT);
-
-   if (sp > ss_limit || sp < 6) {
-   vcpu_printf(vcpu, "%s: #SS, rsp 0x%lx ss 0x%lx limit 0x%x\n",
-   __FUNCTION__,
-   vmcs_readl(GUEST_RSP),
-   vmcs_readl(GUEST_SS_BASE),
-   vmcs_read32(GUEST_SS_LIMIT));
-   return;
-   }
-
-   if (emulator_read_std(irq * sizeof(ent), &ent, sizeof(ent), vcpu) !=
-   X86EMUL_CONTINUE) {
-   vcpu_printf(vcpu, "%s: read guest err\n", __FUNCTION__);
-   return;
-   }
-
-   flags =  vmcs_readl(GUEST_RFLAGS);
-   cs =  vmcs_readl(GUEST_CS_BASE) >> 4;
-   ip =  vmcs_readl(GUEST_RIP);
-
-
-   if (emulator_write_emulated(
-   ss_base + sp - 2, &flags, 2, vcpu) != X86EMUL_CONTINUE ||
-   emulator_write_emulated(
-   ss_base + sp - 4, &cs, 2, vcpu) != X86EMUL_CONTINUE ||
-   emulator_write_emulated(
-   ss_base + sp - 6, &ip, 2, vcpu) != X86EMUL_CONTINUE) {
-   vcpu_printf(vcpu, "%s: write guest err\n", __FUNCTION__);
-   return;
-   }
-
-   vmcs_writel(GUEST_RFLAGS, flags &
-   ~(X86_EFLAGS_IF | X86_EFLAGS_AC | X86_EFLAGS_TF));
-   vmcs_write16(GUEST_CS_SELECTOR, ent[1]) ;
-   vmcs_writel(GUEST_CS_BASE, ent[1] << 4);
-   vmcs_writel(GUEST_RIP, ent[0]);
-   vmcs_writel(GUEST_RSP, (vmcs_readl(GUEST_RSP) & ~0x) | (sp - 6));
-}
-
 static void vmx_inject_irq(struct kvm_vcpu *vcpu, int irq)
 {
if (vcpu->rmode.active) {
-   inject_rmode_irq(vcpu, irq);
+   vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
+irq | INTR_TYPE_SOFT_INTR | INTR_INFO_VALID_MASK);
+   vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, 1);
+   vmcs_writel(GUEST_RIP, vmcs_readl(GUEST_RIP) - 1);
return;
}
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
diff --git a/drivers/kvm/vmx.h b/drivers/kvm/vmx.h
index c84bd37..d757b36 100644
--- a/drivers/kvm/vmx.h
+++ b/drivers/kvm/vmx.h
@@ -233,6 +233,7 @@ enum vmcs_field {
 
 #define INTR_TYPE_EXT_INTR  (0 << 8) /* external interrupt */
 #define INTR_TYPE_EXCEPTION (3 << 8) /* processor exception */
+#define INTR_TYPE_SOFT_INTR (4 << 8) /* software interrupt */
 
 /*
  * Exit Qualifications for MOV for Control Register Access
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 32/55] KVM: Portability: Make exported debugfs data architecture-specific

2007-12-26 Thread Avi Kivity
From: Hollis Blanchard <[EMAIL PROTECTED]>

Signed-off-by: Hollis Blanchard <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |7 +++
 drivers/kvm/kvm_main.c |   25 -
 drivers/kvm/x86.c  |   22 ++
 3 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 22317d6..d030a82 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -761,4 +761,11 @@ static inline u32 get_rdx_init_val(void)
 #define TSS_REDIRECTION_SIZE (256 / 8)
 #define RMODE_TSS_SIZE (TSS_BASE_SIZE + TSS_REDIRECTION_SIZE + TSS_IOPB_SIZE + 
1)
 
+struct kvm_stats_debugfs_item {
+   const char *name;
+   int offset;
+   struct dentry *dentry;
+};
+extern struct kvm_stats_debugfs_item debugfs_entries[];
+
 #endif
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index ad4c84c..11580be 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -63,31 +63,6 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_cache);
 
 static __read_mostly struct preempt_ops kvm_preempt_ops;
 
-#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
-
-static struct kvm_stats_debugfs_item {
-   const char *name;
-   int offset;
-   struct dentry *dentry;
-} debugfs_entries[] = {
-   { "pf_fixed", STAT_OFFSET(pf_fixed) },
-   { "pf_guest", STAT_OFFSET(pf_guest) },
-   { "tlb_flush", STAT_OFFSET(tlb_flush) },
-   { "invlpg", STAT_OFFSET(invlpg) },
-   { "exits", STAT_OFFSET(exits) },
-   { "io_exits", STAT_OFFSET(io_exits) },
-   { "mmio_exits", STAT_OFFSET(mmio_exits) },
-   { "signal_exits", STAT_OFFSET(signal_exits) },
-   { "irq_window", STAT_OFFSET(irq_window_exits) },
-   { "halt_exits", STAT_OFFSET(halt_exits) },
-   { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
-   { "request_irq", STAT_OFFSET(request_irq_exits) },
-   { "irq_exits", STAT_OFFSET(irq_exits) },
-   { "light_exits", STAT_OFFSET(light_exits) },
-   { "efer_reload", STAT_OFFSET(efer_reload) },
-   { NULL }
-};
-
 static struct dentry *debugfs_dir;
 
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index f75e7d7..c1f10e5 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -40,6 +40,28 @@
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 #define EFER_RESERVED_BITS 0xf2fe
 
+#define STAT_OFFSET(x) offsetof(struct kvm_vcpu, stat.x)
+
+struct kvm_stats_debugfs_item debugfs_entries[] = {
+   { "pf_fixed", STAT_OFFSET(pf_fixed) },
+   { "pf_guest", STAT_OFFSET(pf_guest) },
+   { "tlb_flush", STAT_OFFSET(tlb_flush) },
+   { "invlpg", STAT_OFFSET(invlpg) },
+   { "exits", STAT_OFFSET(exits) },
+   { "io_exits", STAT_OFFSET(io_exits) },
+   { "mmio_exits", STAT_OFFSET(mmio_exits) },
+   { "signal_exits", STAT_OFFSET(signal_exits) },
+   { "irq_window", STAT_OFFSET(irq_window_exits) },
+   { "halt_exits", STAT_OFFSET(halt_exits) },
+   { "halt_wakeup", STAT_OFFSET(halt_wakeup) },
+   { "request_irq", STAT_OFFSET(request_irq_exits) },
+   { "irq_exits", STAT_OFFSET(irq_exits) },
+   { "light_exits", STAT_OFFSET(light_exits) },
+   { "efer_reload", STAT_OFFSET(efer_reload) },
+   { NULL }
+};
+
+
 unsigned long segment_base(u16 selector)
 {
struct descriptor_table gdt;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 28/55] KVM: x86 emulator: centralize decoding of one-byte register access insns

2007-12-26 Thread Avi Kivity
Instructions like 'inc reg' that have the register operand encoded
in the opcode are currently specially decoded.  Extend
decode_register_operand() to handle that case, indicated by having
DstReg or SrcReg without ModRM.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |  103 -
 drivers/kvm/x86_emulate.h |1 +
 2 files changed, 47 insertions(+), 57 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 58ceb66..884e4a2 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -99,17 +99,13 @@ static u16 opcode_table[256] = {
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
/* 0x40 - 0x47 */
-   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+   DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x48 - 0x4F */
-   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+   DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x50 - 0x57 */
-   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+   SrcReg, SrcReg, SrcReg, SrcReg, SrcReg, SrcReg, SrcReg, SrcReg,
/* 0x58 - 0x5F */
-   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
-   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+   DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg, DstReg,
/* 0x60 - 0x67 */
0, 0, 0, DstReg | SrcMem32 | ModRM | Mov /* movsxd (x86/64) */ ,
0, 0, 0, 0,
@@ -525,13 +521,17 @@ static void decode_register_operand(struct operand *op,
int highbyte_regs,
int inhibit_bytereg)
 {
+   unsigned reg = c->modrm_reg;
+
+   if (!(c->d & ModRM))
+   reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
op->type = OP_REG;
if ((c->d & ByteOp) && !inhibit_bytereg) {
-   op->ptr = decode_register(c->modrm_reg, c->regs, highbyte_regs);
+   op->ptr = decode_register(reg, c->regs, highbyte_regs);
op->val = *(u8 *)op->ptr;
op->bytes = 1;
} else {
-   op->ptr = decode_register(c->modrm_reg, c->regs, 0);
+   op->ptr = decode_register(reg, c->regs, 0);
op->bytes = c->op_bytes;
switch (op->bytes) {
case 2:
@@ -552,7 +552,7 @@ int
 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
struct decode_cache *c = &ctxt->decode;
-   u8 sib, rex_prefix = 0;
+   u8 sib;
int rc = 0;
int mode = ctxt->mode;
int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
@@ -616,7 +616,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
case 0x40 ... 0x4f: /* REX */
if (mode != X86EMUL_MODE_PROT64)
goto done_prefixes;
-   rex_prefix = c->b;
+   c->rex_prefix = c->b;
continue;
case 0xf0:  /* LOCK */
c->lock_prefix = 1;
@@ -631,18 +631,18 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
 
/* Any legacy prefix after a REX prefix nullifies its effect. */
 
-   rex_prefix = 0;
+   c->rex_prefix = 0;
}
 
 done_prefixes:
 
/* REX prefix. */
-   if (rex_prefix) {
-   if (rex_prefix & 8)
+   if (c->rex_prefix) {
+   if (c->rex_prefix & 8)
c->op_bytes = 8;/* REX.W */
-   c->modrm_reg = (rex_prefix & 4) << 1;   /* REX.R */
-   index_reg = (rex_prefix & 2) << 2; /* REX.X */
-   c->modrm_rm = base_reg = (rex_prefix & 1) << 3; /* REG.B */
+   c->modrm_reg = (c->rex_prefix & 4) << 1;/* REX.R */
+   index_reg = (c->rex_prefix & 2) << 2; /* REX.X */
+   c->modrm_rm = base_reg = (c->rex_prefix & 1) << 3; /* REG.B */
}
 
/* Opcode byte(s). */
@@ -837,7 +837,7 @@ modrm_done:
case SrcNone:
break;
case SrcReg:
-   decode_register_operand(&c->src, c, rex_prefix == 0, 0);
+   decode_register_operand(&c->src, c, c->rex_prefix == 0, 0);
break;
case SrcMem16:
c->src.bytes = 2;
@@ -895,7 +895,7 @@ modrm_done:
 

[kvm-devel] [PATCH 38/55] KVM: VMX: Read & store IDT_VECTORING_INFO_FIELD

2007-12-26 Thread Avi Kivity
We'll want to write to it in order to fix real-mode irq injection problems,
but it is a read-only field.  Storing it in a variable solves that issue.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |   17 -
 1 files changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 1166132..bde2d07 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -48,6 +48,7 @@ struct vcpu_vmx {
struct kvm_vcpu   vcpu;
int   launched;
u8fail;
+   u32   idt_vectoring_info;
struct kvm_msr_entry *guest_msrs;
struct kvm_msr_entry *host_msrs;
int   nmsrs;
@@ -863,9 +864,10 @@ static int set_guest_debug(struct kvm_vcpu *vcpu, struct 
kvm_debug_guest *dbg)
 
 static int vmx_get_irq(struct kvm_vcpu *vcpu)
 {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 idtv_info_field;
 
-   idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+   idtv_info_field = vmx->idt_vectoring_info;
if (idtv_info_field & INTR_INFO_VALID_MASK) {
if (is_external_interrupt(idtv_info_field))
return idtv_info_field & VECTORING_INFO_VECTOR_MASK;
@@ -1817,12 +1819,13 @@ static int handle_rmode_exception(struct kvm_vcpu *vcpu,
 
 static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
 {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info, error_code;
unsigned long cr2, rip;
u32 vect_info;
enum emulation_result er;
 
-   vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+   vect_info = vmx->idt_vectoring_info;
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
 
if ((vect_info & VECTORING_INFO_VALID_MASK) &&
@@ -2171,9 +2174,9 @@ static const int kvm_vmx_max_exit_handlers =
  */
 static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
 {
-   u32 vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
u32 exit_reason = vmcs_read32(VM_EXIT_REASON);
struct vcpu_vmx *vmx = to_vmx(vcpu);
+   u32 vectoring_info = vmx->idt_vectoring_info;
 
if (unlikely(vmx->fail)) {
kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
@@ -2228,6 +2231,7 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 
 static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 {
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 idtv_info_field, intr_info_field;
int has_ext_irq, interrupt_window_open;
int vector;
@@ -2236,7 +2240,7 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
 
has_ext_irq = kvm_cpu_has_interrupt(vcpu);
intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD);
-   idtv_info_field = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+   idtv_info_field = vmx->idt_vectoring_info;
if (intr_info_field & INTR_INFO_VALID_MASK) {
if (idtv_info_field & INTR_INFO_VALID_MASK) {
/* TODO: fault when IDT_Vectoring */
@@ -2396,6 +2400,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
 #endif
  );
 
+   vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+
vcpu->interrupt_window_open =
(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
 
@@ -2413,7 +2419,8 @@ static void vmx_inject_page_fault(struct kvm_vcpu *vcpu,
  unsigned long addr,
  u32 err_code)
 {
-   u32 vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   u32 vect_info = vmx->idt_vectoring_info;
 
++vcpu->stat.pf_guest;
 
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 30/55] KVM: Make mark_page_dirty() work for aliased pages too.

2007-12-26 Thread Avi Kivity
From: Uri Lublin <[EMAIL PROTECTED]>

Recommended by Izik Eidus.

Signed-off-by: Uri Lublin <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 27f3a6e..ad4c84c 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -796,11 +796,11 @@ int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned 
long len)
 }
 EXPORT_SYMBOL_GPL(kvm_clear_guest);
 
-/* WARNING: Does not work on aliased pages. */
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_memory_slot *memslot;
 
+   gfn = unalias_gfn(kvm, gfn);
memslot = __gfn_to_memslot(kvm, gfn);
if (memslot && memslot->dirty_bitmap) {
unsigned long rel_gfn = gfn - memslot->base_gfn;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 17/55] KVM: MMU: Topup the mmu memory preallocation caches before emulating an insn

2007-12-26 Thread Avi Kivity
Emulation may cause a shadow pte to be instantiated, which requires
memory resources.  Make sure the caches are filled to avoid an oops.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/mmu.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index ace3cb8..9be54a5 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1362,6 +1362,10 @@ int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, 
u32 error_code)
goto out;
}
 
+   r = mmu_topup_memory_caches(vcpu);
+   if (r)
+   goto out;
+
er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
mutex_unlock(&vcpu->kvm->lock);
 
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 19/55] KVM: Portability: Move memory segmentation to x86.c

2007-12-26 Thread Avi Kivity
From: Carsten Otte <[EMAIL PROTECTED]>

This patch moves the definition of segment_descriptor_64 for AMD64 and
EM64T from kvm_main.c to segment_descriptor.h. It also adds a proper
#ifndef...#define...#endif around that header file.
The implementation of segment_base is moved from kvm_main.c to x86.c.

Signed-off-by: Carsten Otte <[EMAIL PROTECTED]>
Reviewed-by: Christian Borntraeger <[EMAIL PROTECTED]>
Acked-by: Hollis Blanchard <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c   |   42 --
 drivers/kvm/segment_descriptor.h |   12 ++
 drivers/kvm/x86.c|   33 +
 3 files changed, 45 insertions(+), 42 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 9c0175d..00f9c9e 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -18,7 +18,6 @@
 #include "kvm.h"
 #include "x86.h"
 #include "x86_emulate.h"
-#include "segment_descriptor.h"
 #include "irq.h"
 
 #include 
@@ -104,50 +103,9 @@ static struct dentry *debugfs_dir;
 #define CR8_RESERVED_BITS (~(unsigned long)X86_CR8_TPR)
 #define EFER_RESERVED_BITS 0xf2fe
 
-#ifdef CONFIG_X86_64
-/* LDT or TSS descriptor in the GDT. 16 bytes. */
-struct segment_descriptor_64 {
-   struct segment_descriptor s;
-   u32 base_higher;
-   u32 pad_zero;
-};
-
-#endif
-
 static long kvm_vcpu_ioctl(struct file *file, unsigned int ioctl,
   unsigned long arg);
 
-unsigned long segment_base(u16 selector)
-{
-   struct descriptor_table gdt;
-   struct segment_descriptor *d;
-   unsigned long table_base;
-   unsigned long v;
-
-   if (selector == 0)
-   return 0;
-
-   asm("sgdt %0" : "=m"(gdt));
-   table_base = gdt.base;
-
-   if (selector & 4) {   /* from ldt */
-   u16 ldt_selector;
-
-   asm("sldt %0" : "=g"(ldt_selector));
-   table_base = segment_base(ldt_selector);
-   }
-   d = (struct segment_descriptor *)(table_base + (selector & ~7));
-   v = d->base_low | ((unsigned long)d->base_mid << 16) |
-   ((unsigned long)d->base_high << 24);
-#ifdef CONFIG_X86_64
-   if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
-   v |= ((unsigned long) \
- ((struct segment_descriptor_64 *)d)->base_higher) << 32;
-#endif
-   return v;
-}
-EXPORT_SYMBOL_GPL(segment_base);
-
 static inline int valid_vcpu(int n)
 {
return likely(n >= 0 && n < KVM_MAX_VCPUS);
diff --git a/drivers/kvm/segment_descriptor.h b/drivers/kvm/segment_descriptor.h
index 71fdf45..56fc4c8 100644
--- a/drivers/kvm/segment_descriptor.h
+++ b/drivers/kvm/segment_descriptor.h
@@ -1,3 +1,6 @@
+#ifndef __SEGMENT_DESCRIPTOR_H
+#define __SEGMENT_DESCRIPTOR_H
+
 struct segment_descriptor {
u16 limit_low;
u16 base_low;
@@ -14,4 +17,13 @@ struct segment_descriptor {
u8  base_high;
 } __attribute__((packed));
 
+#ifdef CONFIG_X86_64
+/* LDT or TSS descriptor in the GDT. 16 bytes. */
+struct segment_descriptor_64 {
+   struct segment_descriptor s;
+   u32 base_higher;
+   u32 pad_zero;
+};
 
+#endif
+#endif
diff --git a/drivers/kvm/x86.c b/drivers/kvm/x86.c
index b84cb67..5a95922 100644
--- a/drivers/kvm/x86.c
+++ b/drivers/kvm/x86.c
@@ -16,16 +16,49 @@
 
 #include "kvm.h"
 #include "x86.h"
+#include "segment_descriptor.h"
 #include "irq.h"
 
 #include 
 #include 
 #include 
+#include 
 
 #include 
 
 #define MAX_IO_MSRS 256
 
+unsigned long segment_base(u16 selector)
+{
+   struct descriptor_table gdt;
+   struct segment_descriptor *d;
+   unsigned long table_base;
+   unsigned long v;
+
+   if (selector == 0)
+   return 0;
+
+   asm("sgdt %0" : "=m"(gdt));
+   table_base = gdt.base;
+
+   if (selector & 4) {   /* from ldt */
+   u16 ldt_selector;
+
+   asm("sldt %0" : "=g"(ldt_selector));
+   table_base = segment_base(ldt_selector);
+   }
+   d = (struct segment_descriptor *)(table_base + (selector & ~7));
+   v = d->base_low | ((unsigned long)d->base_mid << 16) |
+   ((unsigned long)d->base_high << 24);
+#ifdef CONFIG_X86_64
+   if (d->system == 0 && (d->type == 2 || d->type == 9 || d->type == 11))
+   v |= ((unsigned long) \
+ ((struct segment_descriptor_64 *)d)->base_higher) << 32;
+#endif
+   return v;
+}
+EXPORT_SYMBOL_GPL(segment_base);
+
 /*
  * List of msr numbers which we ex

[kvm-devel] [PATCH 29/55] KVM: Simplify decode_register_operand() calling convention

2007-12-26 Thread Avi Kivity
Now that rex_prefix is part of the decode cache, there is no need to pass
it along.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 884e4a2..b352a6c 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -518,10 +518,10 @@ static int test_cc(unsigned int condition, unsigned int 
flags)
 
 static void decode_register_operand(struct operand *op,
struct decode_cache *c,
-   int highbyte_regs,
int inhibit_bytereg)
 {
unsigned reg = c->modrm_reg;
+   int highbyte_regs = c->rex_prefix == 0;
 
if (!(c->d & ModRM))
reg = (c->b & 7) | ((c->rex_prefix & 1) << 3);
@@ -837,7 +837,7 @@ modrm_done:
case SrcNone:
break;
case SrcReg:
-   decode_register_operand(&c->src, c, c->rex_prefix == 0, 0);
+   decode_register_operand(&c->src, c, 0);
break;
case SrcMem16:
c->src.bytes = 2;
@@ -895,7 +895,7 @@ modrm_done:
/* Special instructions do their own operand decoding. */
return 0;
case DstReg:
-   decode_register_operand(&c->dst, c, c->rex_prefix == 0,
+   decode_register_operand(&c->dst, c,
 c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
break;
case DstMem:
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 23/55] KVM: Fix gfn_to_page() acquiring mmap_sem twice

2007-12-26 Thread Avi Kivity
From: Anthony Liguori <[EMAIL PROTECTED]>

KVM's nopage handler calls gfn_to_page() which acquires the mmap_sem when
calling out to get_user_pages().  nopage handlers are already invoked with the
mmap_sem held though.  Introduce a __gfn_to_page() for use by the nopage
handler which requires the lock to already be held.

This was noticed by tglx.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |   22 ++
 1 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c3dc24f..7186791 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -633,7 +633,10 @@ int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
 
-struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+/*
+ * Requires current->mm->mmap_sem to be held
+ */
+static struct page *__gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_memory_slot *slot;
struct page *page[1];
@@ -648,12 +651,10 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
return bad_page;
}
 
-   down_read(¤t->mm->mmap_sem);
npages = get_user_pages(current, current->mm,
slot->userspace_addr
+ (gfn - slot->base_gfn) * PAGE_SIZE, 1,
1, 1, page, NULL);
-   up_read(¤t->mm->mmap_sem);
if (npages != 1) {
get_page(bad_page);
return bad_page;
@@ -661,6 +662,18 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 
return page[0];
 }
+
+struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
+{
+   struct page *page;
+
+   down_read(¤t->mm->mmap_sem);
+   page = __gfn_to_page(kvm, gfn);
+   up_read(¤t->mm->mmap_sem);
+
+   return page;
+}
+
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
 void kvm_release_page(struct page *page)
@@ -2621,7 +2634,8 @@ static struct page *kvm_vm_nopage(struct vm_area_struct 
*vma,
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
if (!kvm_is_visible_gfn(kvm, pgoff))
return NOPAGE_SIGBUS;
-   page = gfn_to_page(kvm, pgoff);
+   /* current->mm->mmap_sem is already held so call lockless version */
+   page = __gfn_to_page(kvm, pgoff);
if (is_error_page(page)) {
kvm_release_page(page);
return NOPAGE_SIGBUS;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 27/55] KVM: x86 emulator: Extract the common code of SrcReg and DstReg

2007-12-26 Thread Avi Kivity
Share the common parts of SrcReg and DstReg decoding.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |   80 +---
 1 files changed, 31 insertions(+), 49 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 087a820..58ceb66 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -520,6 +520,34 @@ static int test_cc(unsigned int condition, unsigned int 
flags)
return (!!rc ^ (condition & 1));
 }
 
+static void decode_register_operand(struct operand *op,
+   struct decode_cache *c,
+   int highbyte_regs,
+   int inhibit_bytereg)
+{
+   op->type = OP_REG;
+   if ((c->d & ByteOp) && !inhibit_bytereg) {
+   op->ptr = decode_register(c->modrm_reg, c->regs, highbyte_regs);
+   op->val = *(u8 *)op->ptr;
+   op->bytes = 1;
+   } else {
+   op->ptr = decode_register(c->modrm_reg, c->regs, 0);
+   op->bytes = c->op_bytes;
+   switch (op->bytes) {
+   case 2:
+   op->val = *(u16 *)op->ptr;
+   break;
+   case 4:
+   op->val = *(u32 *)op->ptr;
+   break;
+   case 8:
+   op->val = *(u64 *) op->ptr;
+   break;
+   }
+   }
+   op->orig_val = op->val;
+}
+
 int
 x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
@@ -809,31 +837,7 @@ modrm_done:
case SrcNone:
break;
case SrcReg:
-   c->src.type = OP_REG;
-   if (c->d & ByteOp) {
-   c->src.ptr =
-   decode_register(c->modrm_reg, c->regs,
- (rex_prefix == 0));
-   c->src.val = c->src.orig_val = *(u8 *)c->src.ptr;
-   c->src.bytes = 1;
-   } else {
-   c->src.ptr =
-   decode_register(c->modrm_reg, c->regs, 0);
-   switch ((c->src.bytes = c->op_bytes)) {
-   case 2:
-   c->src.val = c->src.orig_val =
-  *(u16 *) c->src.ptr;
-   break;
-   case 4:
-   c->src.val = c->src.orig_val =
-  *(u32 *) c->src.ptr;
-   break;
-   case 8:
-   c->src.val = c->src.orig_val =
-  *(u64 *) c->src.ptr;
-   break;
-   }
-   }
+   decode_register_operand(&c->src, c, rex_prefix == 0, 0);
break;
case SrcMem16:
c->src.bytes = 2;
@@ -891,30 +895,8 @@ modrm_done:
/* Special instructions do their own operand decoding. */
return 0;
case DstReg:
-   c->dst.type = OP_REG;
-   if ((c->d & ByteOp)
-   && !(c->twobyte &&
-   (c->b == 0xb6 || c->b == 0xb7))) {
-   c->dst.ptr =
-   decode_register(c->modrm_reg, c->regs,
- (rex_prefix == 0));
-   c->dst.val = *(u8 *) c->dst.ptr;
-   c->dst.bytes = 1;
-   } else {
-   c->dst.ptr =
-   decode_register(c->modrm_reg, c->regs, 0);
-   switch ((c->dst.bytes = c->op_bytes)) {
-   case 2:
-   c->dst.val = *(u16 *)c->dst.ptr;
-   break;
-   case 4:
-   c->dst.val = *(u32 *)c->dst.ptr;
-   break;
-   case 8:
-   c->dst.val = *(u64 *)c->dst.ptr;
-   break;
-   }
-   }
+   decode_register_operand(&c->dst, c, rex_prefix == 0,
+c->twobyte && (c->b == 0xb6 || c->b == 0xb7));
break;
case DstMem:
if ((c->d & ModRM) && c->modrm_mod == 3) {
-- 
1.5.3.7



[kvm-devel] [PATCH 16/55] KVM: Move page fault processing to common code

2007-12-26 Thread Avi Kivity
The code that dispatches the page fault and emulates if we failed to map
is duplicated across vmx and svm.  Merge it to simplify further bugfixing.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/mmu.c |   36 
 drivers/kvm/svm.c |   33 +
 drivers/kvm/vmx.c |   29 +
 drivers/kvm/x86.h |6 +-
 4 files changed, 39 insertions(+), 65 deletions(-)

diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index d9c5950..ace3cb8 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1347,6 +1347,42 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
}
 }
 
+int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gva_t cr2, u32 error_code)
+{
+   int r;
+   enum emulation_result er;
+
+   mutex_lock(&vcpu->kvm->lock);
+   r = vcpu->mmu.page_fault(vcpu, cr2, error_code);
+   if (r < 0)
+   goto out;
+
+   if (!r) {
+   r = 1;
+   goto out;
+   }
+
+   er = emulate_instruction(vcpu, vcpu->run, cr2, error_code, 0);
+   mutex_unlock(&vcpu->kvm->lock);
+
+   switch (er) {
+   case EMULATE_DONE:
+   return 1;
+   case EMULATE_DO_MMIO:
+   ++vcpu->stat.mmio_exits;
+   return 0;
+   case EMULATE_FAIL:
+   kvm_report_emulation_failure(vcpu, "pagetable");
+   return 1;
+   default:
+   BUG();
+   }
+out:
+   mutex_unlock(&vcpu->kvm->lock);
+   return r;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_page_fault);
+
 static void free_mmu_pages(struct kvm_vcpu *vcpu)
 {
struct kvm_mmu_page *page;
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index cdd4fd2..991e77d 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -933,45 +933,14 @@ static int pf_interception(struct vcpu_svm *svm, struct 
kvm_run *kvm_run)
struct kvm *kvm = svm->vcpu.kvm;
u64 fault_address;
u32 error_code;
-   enum emulation_result er;
-   int r;
 
if (!irqchip_in_kernel(kvm) &&
is_external_interrupt(exit_int_info))
push_irq(&svm->vcpu, exit_int_info & SVM_EVTINJ_VEC_MASK);
 
-   mutex_lock(&kvm->lock);
-
fault_address  = svm->vmcb->control.exit_info_2;
error_code = svm->vmcb->control.exit_info_1;
-   r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
-   if (r < 0) {
-   mutex_unlock(&kvm->lock);
-   return r;
-   }
-   if (!r) {
-   mutex_unlock(&kvm->lock);
-   return 1;
-   }
-   er = emulate_instruction(&svm->vcpu, kvm_run, fault_address,
-error_code, 0);
-   mutex_unlock(&kvm->lock);
-
-   switch (er) {
-   case EMULATE_DONE:
-   return 1;
-   case EMULATE_DO_MMIO:
-   ++svm->vcpu.stat.mmio_exits;
-   return 0;
-   case EMULATE_FAIL:
-   kvm_report_emulation_failure(&svm->vcpu, "pagetable");
-   break;
-   default:
-   BUG();
-   }
-
-   kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
-   return 0;
+   return kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code);
 }
 
 static int ud_interception(struct vcpu_svm *svm, struct kvm_run *kvm_run)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index f76677d..265df86 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1796,7 +1796,6 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
unsigned long cr2, rip;
u32 vect_info;
enum emulation_result er;
-   int r;
 
vect_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
@@ -1834,33 +1833,7 @@ static int handle_exception(struct kvm_vcpu *vcpu, 
struct kvm_run *kvm_run)
error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
if (is_page_fault(intr_info)) {
cr2 = vmcs_readl(EXIT_QUALIFICATION);
-
-   mutex_lock(&vcpu->kvm->lock);
-   r = kvm_mmu_page_fault(vcpu, cr2, error_code);
-   if (r < 0) {
-   mutex_unlock(&vcpu->kvm->lock);
-   return r;
-   }
-   if (!r) {
-   mutex_unlock(&vcpu->kvm->lock);
-   return 1;
-   }
-
-   er = emulate_instruction(vcpu, kvm_run, cr2, error_code, 0);
-   mutex_unlock(&vcpu->kvm->lock);
-
-   switch (er) {
-   case EMULATE_DONE:
-   return 1;
-   case EMULATE_DO_MMIO:
-   ++vcpu->stat.mmio_exits;
-   

[kvm-devel] [PATCH 12/55] KVM: VMX: Let gcc to choose which registers to save (i386)

2007-12-26 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

This patch lets GCC to determine which registers to save when we
switch to/from a VCPU in the case of intel i386.

* Original code saves following registers:

eax, ebx, ecx, edx, edi, esi, ebp (using popa)

* Patched code:

  - informs GCC that we modify following registers
using the clobber description:

ebx, edi, rsi

  - doesn't save eax because it is an output operand (vmx->fail)

  - cannot put ecx in clobber description because it is an input operand,
but as we modify it and we want to keep its value (vcpu), we must
save it (pop/push)

  - ebp is saved (pop/push) because GCC seems to ignore its use the clobber
description.

  - edx is saved (pop/push) because it is reserved by GCC (REGPARM) and
cannot be put in the clobber description.

  - line "mov (%%esp), %3 \n\t" has been removed because %3
is ecx and ecx is restored just after.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |8 +---
 1 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 3a58a2a..f76677d 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -2268,7 +2268,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
"push %%rdx; push %%rbp;"
"push %%rcx \n\t"
 #else
-   "pusha; push %%ecx \n\t"
+   "push %%edx; push %%ebp;"
+   "push %%ecx \n\t"
 #endif
ASM_VMX_VMWRITE_RSP_RDX "\n\t"
/* Check if vmlaunch of vmresume is needed */
@@ -2342,9 +2343,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
"mov %%ebp, %c[rbp](%3) \n\t"
"mov %%cr2, %%eax  \n\t"
"mov %%eax, %c[cr2](%3) \n\t"
-   "mov (%%esp), %3 \n\t"
 
-   "pop %%ecx; popa \n\t"
+   "pop %%ecx; pop %%ebp; pop %%edx \n\t"
 #endif
"setbe %0 \n\t"
  : "=q" (vmx->fail)
@@ -2372,6 +2372,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
 #ifdef CONFIG_X86_64
, "rbx", "rdi", "rsi"
, "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
+#else
+   , "ebx", "edi", "rsi"
 #endif
  );
 
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 10/55] KVM: Add ioctl to tss address from userspace,

2007-12-26 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

Currently kvm has a wart in that it requires three extra pages for use
as a tss when emulating real mode on Intel.  This patch moves the allocation
internally, only requiring userspace to tell us where in the physical address
space we can place the tss.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |3 +++
 drivers/kvm/kvm_main.c |   16 
 drivers/kvm/svm.c  |7 +++
 drivers/kvm/vmx.c  |   27 +--
 include/linux/kvm.h|2 ++
 5 files changed, 53 insertions(+), 2 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 82c3b13..3d07d9b 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -382,6 +382,7 @@ struct kvm {
struct kvm_pic *vpic;
struct kvm_ioapic *vioapic;
int round_robin_prev_vcpu;
+   unsigned int tss_addr;
 };
 
 static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
@@ -467,6 +468,8 @@ struct kvm_x86_ops {
void (*inject_pending_irq)(struct kvm_vcpu *vcpu);
void (*inject_pending_vectors)(struct kvm_vcpu *vcpu,
   struct kvm_run *run);
+
+   int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
 };
 
 extern struct kvm_x86_ops *kvm_x86_ops;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index bc8334b..26a6399 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -2661,6 +2661,16 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu)
return fd;
 }
 
+static int kvm_vm_ioctl_set_tss_addr(struct kvm *kvm, unsigned long addr)
+{
+   int ret;
+
+   if (addr > (unsigned int)(-3 * PAGE_SIZE))
+   return -1;
+   ret = kvm_x86_ops->set_tss_addr(kvm, addr);
+   return ret;
+}
+
 /*
  * Creates some virtual cpus.  Good luck creating more than one.
  */
@@ -2957,6 +2967,11 @@ static long kvm_vm_ioctl(struct file *filp,
int r = -EINVAL;
 
switch (ioctl) {
+   case KVM_SET_TSS_ADDR:
+   r = kvm_vm_ioctl_set_tss_addr(kvm, arg);
+   if (r < 0)
+   goto out;
+   break;
case KVM_CREATE_VCPU:
r = kvm_vm_ioctl_create_vcpu(kvm, arg);
if (r < 0)
@@ -3183,6 +3198,7 @@ static long kvm_dev_ioctl(struct file *filp,
case KVM_CAP_HLT:
case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
case KVM_CAP_USER_MEMORY:
+   case KVM_CAP_SET_TSS_ADDR:
r = 1;
break;
default:
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 4a70168..56d1ad6 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1439,6 +1439,11 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
control->intercept &= ~(1ULL << INTERCEPT_VINTR);
 }
 
+static int svm_set_tss_addr(struct kvm *kvm, unsigned int addr)
+{
+   return 0;
+}
+
 static void save_db_regs(unsigned long *db_regs)
 {
asm volatile ("mov %%dr0, %0" : "=r"(db_regs[0]));
@@ -1744,6 +1749,8 @@ static struct kvm_x86_ops svm_x86_ops = {
.set_irq = svm_set_irq,
.inject_pending_irq = svm_intr_assist,
.inject_pending_vectors = do_interrupt_requests,
+
+   .set_tss_addr = svm_set_tss_addr,
 };
 
 static int __init svm_init(void)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 77083e4..50c151e 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1105,8 +1105,12 @@ static void enter_pmode(struct kvm_vcpu *vcpu)
 
 static gva_t rmode_tss_base(struct kvm *kvm)
 {
-   gfn_t base_gfn = kvm->memslots[0].base_gfn + kvm->memslots[0].npages - 
3;
-   return base_gfn << PAGE_SHIFT;
+   if (!kvm->tss_addr) {
+   gfn_t base_gfn = kvm->memslots[0].base_gfn +
+kvm->memslots[0].npages - 3;
+   return base_gfn << PAGE_SHIFT;
+   }
+   return kvm->tss_addr;
 }
 
 static void fix_rmode_seg(int seg, struct kvm_save_segment *save)
@@ -1735,6 +1739,23 @@ static void do_interrupt_requests(struct kvm_vcpu *vcpu,
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control);
 }
 
+static int vmx_set_tss_addr(struct kvm *kvm, unsigned int addr)
+{
+   int ret;
+   struct kvm_userspace_memory_region tss_mem = {
+   .slot = 8,
+   .guest_phys_addr = addr,
+   .memory_size = PAGE_SIZE * 3,
+   .flags = 0,
+   };
+
+   ret = kvm_set_memory_region(kvm, &tss_mem, 0);
+   if (ret)
+   return ret;
+   kvm->tss_addr = addr;
+   return 0;
+}
+
 static void kvm_guest_debug_pre(struct kvm_vcpu *vcpu)
 {
struct kvm_guest_debug *dbg = &vcpu->guest_debug;
@@ -2543,6 +2564,8 @@ static struct k

[kvm-devel] [PATCH 03/55] KVM: Add a might_sleep() annotation to gfn_to_page()

2007-12-26 Thread Avi Kivity
This will help trap accesses to guest memory in atomic context.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index ff77175..005f2d5 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -993,6 +993,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
struct page *page[1];
int npages;
 
+   might_sleep();
+
gfn = unalias_gfn(kvm, gfn);
slot = __gfn_to_memslot(kvm, gfn);
if (!slot) {
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 09/55] KVM: Add kernel-internal memory slots

2007-12-26 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

Reserve a few memory slots for kernel internal use.  This is good for case
you have to register memory region and you want to be sure it was not
registered from userspace, and for case you want to register a memory region
that won't be seen from userspace.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |6 +-
 drivers/kvm/kvm_main.c |   22 +-
 2 files changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index f3dda08..82c3b13 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -41,6 +41,8 @@
 #define KVM_MAX_VCPUS 4
 #define KVM_ALIAS_SLOTS 4
 #define KVM_MEMORY_SLOTS 8
+/* memory slots that does not exposed to userspace */
+#define KVM_PRIVATE_MEM_SLOTS 4
 #define KVM_PERMILLE_MMU_PAGES 20
 #define KVM_MIN_ALLOC_MMU_PAGES 64
 #define KVM_NUM_MMU_PAGES 1024
@@ -361,7 +363,8 @@ struct kvm {
int naliases;
struct kvm_mem_alias aliases[KVM_ALIAS_SLOTS];
int nmemslots;
-   struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS];
+   struct kvm_memory_slot memslots[KVM_MEMORY_SLOTS +
+   KVM_PRIVATE_MEM_SLOTS];
/*
 * Hash table of struct kvm_mmu_page.
 */
@@ -529,6 +532,7 @@ int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void 
*data,
 int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
 int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
+int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
 enum emulation_result {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 16176a2..bc8334b 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -660,7 +660,7 @@ int kvm_set_memory_region(struct kvm *kvm,
goto out;
if (mem->guest_phys_addr & (PAGE_SIZE - 1))
goto out;
-   if (mem->slot >= KVM_MEMORY_SLOTS)
+   if (mem->slot >= KVM_MEMORY_SLOTS + KVM_PRIVATE_MEM_SLOTS)
goto out;
if (mem->guest_phys_addr + mem->memory_size < mem->guest_phys_addr)
goto out;
@@ -797,6 +797,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
  kvm_userspace_memory_region *mem,
  int user_alloc)
 {
+   if (mem->slot >= KVM_MEMORY_SLOTS)
+   return -EINVAL;
return kvm_set_memory_region(kvm, mem, user_alloc);
 }
 
@@ -1010,6 +1012,22 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, 
gfn_t gfn)
return __gfn_to_memslot(kvm, gfn);
 }
 
+int kvm_is_visible_gfn(struct kvm *kvm, gfn_t gfn)
+{
+   int i;
+
+   gfn = unalias_gfn(kvm, gfn);
+   for (i = 0; i < KVM_MEMORY_SLOTS; ++i) {
+   struct kvm_memory_slot *memslot = &kvm->memslots[i];
+
+   if (gfn >= memslot->base_gfn
+   && gfn < memslot->base_gfn + memslot->npages)
+   return 1;
+   }
+   return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_is_visible_gfn);
+
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_memory_slot *slot;
@@ -3087,6 +3105,8 @@ static struct page *kvm_vm_nopage(struct vm_area_struct 
*vma,
struct page *page;
 
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
+   if (!kvm_is_visible_gfn(kvm, pgoff))
+   return NOPAGE_SIGBUS;
page = gfn_to_page(kvm, pgoff);
if (is_error_page(page)) {
kvm_release_page(page);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 13/55] KVM: SVM: Let gcc to choose which registers to save (x86_64)

2007-12-26 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

This patch lets GCC to determine which registers to save when we
switch to/from a VCPU in the case of AMD x86_64.

* Original code saves following registers:

rbx, rcx, rdx, rsi, rdi, rbp,
r8, r9, r10, r11, r12, r13, r14, r15

* Patched code:

  - informs GCC that we modify following registers
using the clobber description:

rbx, rcx, rdx, rsi, rdi
r8, r9, r10, r11, r12, r13, r14, r15

  - rbp is saved (pop/push) because GCC seems to ignore its use in the clobber
description.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/svm.c |   17 -
 1 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 56d1ad6..1298bde 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1499,10 +1499,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
 
asm volatile (
 #ifdef CONFIG_X86_64
-   "push %%rbx; push %%rcx; push %%rdx;"
-   "push %%rsi; push %%rdi; push %%rbp;"
-   "push %%r8;  push %%r9;  push %%r10; push %%r11;"
-   "push %%r12; push %%r13; push %%r14; push %%r15;"
+   "push %%rbp; \n\t"
 #else
"push %%ebx; push %%ecx; push %%edx;"
"push %%esi; push %%edi; push %%ebp;"
@@ -1567,10 +1564,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
"mov %%r14, %c[r14](%[svm]) \n\t"
"mov %%r15, %c[r15](%[svm]) \n\t"
 
-   "pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
-   "pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
-   "pop  %%rbp; pop  %%rdi; pop  %%rsi;"
-   "pop  %%rdx; pop  %%rcx; pop  %%rbx; \n\t"
+   "pop  %%rbp; \n\t"
 #else
"mov %%ebx, %c[rbx](%[svm]) \n\t"
"mov %%ecx, %c[rcx](%[svm]) \n\t"
@@ -1601,7 +1595,12 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
  [r14]"i"(offsetof(struct vcpu_svm, vcpu.regs[VCPU_REGS_R14])),
  [r15]"i"(offsetof(struct vcpu_svm, vcpu.regs[VCPU_REGS_R15]))
 #endif
-   : "cc", "memory");
+   : "cc", "memory"
+#ifdef CONFIG_X86_64
+   , "rbx", "rcx", "rdx", "rsi", "rdi"
+   , "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
+#endif
+   );
 
if ((svm->vmcb->save.dr7 & 0xff))
load_db_regs(svm->host_db_regs);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 08/55] KVM: Export memory slot allocation mechanism

2007-12-26 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

Remove kvm memory slot allocation mechanism from the ioctl
and put it to exported function.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |3 +++
 drivers/kvm/kvm_main.c |   17 +
 2 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 12de42c..f3dda08 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -513,6 +513,9 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
 extern struct page *bad_page;
 
 int is_error_page(struct page *page);
+int kvm_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ int user_alloc);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 void kvm_release_page(struct page *page);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 0e8f77d..16176a2 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -643,10 +643,9 @@ EXPORT_SYMBOL_GPL(fx_init);
  *
  * Discontiguous memory is allowed, mostly for framebuffers.
  */
-static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct
- kvm_userspace_memory_region *mem,
- int user_alloc)
+int kvm_set_memory_region(struct kvm *kvm,
+ struct kvm_userspace_memory_region *mem,
+ int user_alloc)
 {
int r;
gfn_t base_gfn;
@@ -789,6 +788,16 @@ out_unlock:
kvm_free_physmem_slot(&new, &old);
 out:
return r;
+
+}
+EXPORT_SYMBOL_GPL(kvm_set_memory_region);
+
+static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
+ struct
+ kvm_userspace_memory_region *mem,
+ int user_alloc)
+{
+   return kvm_set_memory_region(kvm, mem, user_alloc);
 }
 
 static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 11/55] KVM: VMX: Let gcc to choose which registers to save (x86_64)

2007-12-26 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

This patch lets GCC to determine which registers to save when we
switch to/from a VCPU in the case of intel x86_64.

* Original code saves following registers:

rax, rbx, rcx, rdx, rsi, rdi, rbp,
r8, r9, r10, r11, r12, r13, r14, r15

* Patched code:

  - informs GCC that we modify following registers
using the clobber description:

rbx, rdi, rsi,
r8, r9, r10, r11, r12, r13, r14, r15

  - doesn't save rax because it is an output operand (vmx->fail)

  - cannot put rcx in clobber description because it is an input operand,
but as we modify it and we want to keep its value (vcpu), we must
save it (pop/push)

  - rbp is saved (pop/push) because GCC seems to ignore its use in the clobber
description.

  - rdx is saved (pop/push) because it is reserved by GCC (REGPARM) and
cannot be put in the clobber description.

  - line "mov (%%rsp), %3 \n\t" has been removed because %3
is rcx and rcx is restored just after.

  - line ASM_VMX_VMWRITE_RSP_RDX() is moved out of the ifdef/else/endif

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |   21 +
 1 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 50c151e..3a58a2a 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -2265,16 +2265,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
asm(
/* Store host registers */
 #ifdef CONFIG_X86_64
-   "push %%rax; push %%rbx; push %%rdx;"
-   "push %%rsi; push %%rdi; push %%rbp;"
-   "push %%r8;  push %%r9;  push %%r10; push %%r11;"
-   "push %%r12; push %%r13; push %%r14; push %%r15;"
+   "push %%rdx; push %%rbp;"
"push %%rcx \n\t"
-   ASM_VMX_VMWRITE_RSP_RDX "\n\t"
 #else
"pusha; push %%ecx \n\t"
-   ASM_VMX_VMWRITE_RSP_RDX "\n\t"
 #endif
+   ASM_VMX_VMWRITE_RSP_RDX "\n\t"
/* Check if vmlaunch of vmresume is needed */
"cmp $0, %1 \n\t"
/* Load guest registers.  Don't clobber flags. */
@@ -2333,12 +2329,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
"mov %%r15, %c[r15](%3) \n\t"
"mov %%cr2, %%rax   \n\t"
"mov %%rax, %c[cr2](%3) \n\t"
-   "mov (%%rsp), %3 \n\t"
 
-   "pop  %%rcx; pop  %%r15; pop  %%r14; pop  %%r13; pop  %%r12;"
-   "pop  %%r11; pop  %%r10; pop  %%r9;  pop  %%r8;"
-   "pop  %%rbp; pop  %%rdi; pop  %%rsi;"
-   "pop  %%rdx; pop  %%rbx; pop  %%rax \n\t"
+   "pop  %%rcx; pop  %%rbp; pop  %%rdx \n\t"
 #else
"xchg %3, (%%esp) \n\t"
"mov %%eax, %c[rax](%3) \n\t"
@@ -2376,7 +2368,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
[r15]"i"(offsetof(struct kvm_vcpu, regs[VCPU_REGS_R15])),
 #endif
[cr2]"i"(offsetof(struct kvm_vcpu, cr2))
- : "cc", "memory");
+ : "cc", "memory"
+#ifdef CONFIG_X86_64
+   , "rbx", "rdi", "rsi"
+   , "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"
+#endif
+ );
 
vcpu->interrupt_window_open =
(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & 3) == 0;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 14/55] KVM: SVM: Let gcc to choose which registers to save (i386)

2007-12-26 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

This patch lets GCC to determine which registers to save when we
switch to/from a VCPU in the case of AMD i386

* Original code saves following registers:

ebx, ecx, edx, esi, edi, ebp

* Patched code:

  - informs GCC that we modify following registers
using the clobber description:

ebx, ecx, edx, esi, edi

  - rbp is saved (pop/push) because GCC seems to ignore its use in the clobber
description.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/svm.c |8 
 1 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 1298bde..cdd4fd2 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1501,8 +1501,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
 #ifdef CONFIG_X86_64
"push %%rbp; \n\t"
 #else
-   "push %%ebx; push %%ecx; push %%edx;"
-   "push %%esi; push %%edi; push %%ebp;"
+   "push %%ebp; \n\t"
 #endif
 
 #ifdef CONFIG_X86_64
@@ -1573,8 +1572,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
"mov %%edi, %c[rdi](%[svm]) \n\t"
"mov %%ebp, %c[rbp](%[svm]) \n\t"
 
-   "pop  %%ebp; pop  %%edi; pop  %%esi;"
-   "pop  %%edx; pop  %%ecx; pop  %%ebx; \n\t"
+   "pop  %%ebp; \n\t"
 #endif
:
: [svm]"a"(svm),
@@ -1599,6 +1597,8 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
 #ifdef CONFIG_X86_64
, "rbx", "rcx", "rdx", "rsi", "rdi"
, "r8", "r9", "r10", "r11" , "r12", "r13", "r14", "r15"
+#else
+   , "ebx", "ecx", "edx" , "esi", "edi"
 #endif
);
 
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 00/55] KVM patch queue review for 2.6.25 merge window (part II)

2007-12-26 Thread Avi Kivity
The second 2.6.25 kvm patch series, for your review.  Three more to go.

-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 07/55] KVM: Unmap kernel-allocated memory on slot destruction

2007-12-26 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

kvm_vm_ioctl_set_memory_region() is able to remove memory in addition to
adding it.  Therefore when using kernel swapping support for old userspaces,
we need to munmap the memory if the user request to remove it

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |1 +
 drivers/kvm/kvm_main.c |   14 ++
 2 files changed, 15 insertions(+), 0 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index f7181a4..12de42c 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -353,6 +353,7 @@ struct kvm_memory_slot {
unsigned long *rmap;
unsigned long *dirty_bitmap;
unsigned long userspace_addr;
+   int user_alloc;
 };
 
 struct kvm {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 005f2d5..0e8f77d 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -713,6 +713,7 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 
memset(new.rmap, 0, npages * sizeof(*new.rmap));
 
+   new.user_alloc = user_alloc;
if (user_alloc)
new.userspace_addr = mem->userspace_addr;
else {
@@ -727,6 +728,19 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
if (IS_ERR((void *)new.userspace_addr))
goto out_unlock;
}
+   } else {
+   if (!old.user_alloc && old.rmap) {
+   int ret;
+
+   down_write(¤t->mm->mmap_sem);
+   ret = do_munmap(current->mm, old.userspace_addr,
+   old.npages * PAGE_SIZE);
+   up_write(¤t->mm->mmap_sem);
+   if (ret < 0)
+   printk(KERN_WARNING
+  "kvm_vm_ioctl_set_memory_region: "
+  "failed to munmap memory\n");
+   }
}
 
/* Allocate page dirty bitmap if needed */
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 06/55] KVM: Per-architecture hypercall definitions

2007-12-26 Thread Avi Kivity
From: Christian Borntraeger <[EMAIL PROTECTED]>

Currently kvm provides hypercalls only for x86* architectures. To
provide hypercall infrastructure for other kvm architectures I split
kvm_para.h into a generic header file and architecture specific
definitions.

Signed-off-by: Christian Borntraeger <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 include/asm-x86/kvm_para.h |  105 
 include/linux/kvm_para.h   |  105 +---
 2 files changed, 117 insertions(+), 93 deletions(-)
 create mode 100644 include/asm-x86/kvm_para.h

diff --git a/include/asm-x86/kvm_para.h b/include/asm-x86/kvm_para.h
new file mode 100644
index 000..c6f3fd8
--- /dev/null
+++ b/include/asm-x86/kvm_para.h
@@ -0,0 +1,105 @@
+#ifndef __X86_KVM_PARA_H
+#define __X86_KVM_PARA_H
+
+/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
+ * should be used to determine that a VM is running under KVM.
+ */
+#define KVM_CPUID_SIGNATURE0x4000
+
+/* This CPUID returns a feature bitmap in eax.  Before enabling a particular
+ * paravirtualization, the appropriate feature bit should be checked.
+ */
+#define KVM_CPUID_FEATURES 0x4001
+
+#ifdef __KERNEL__
+#include 
+
+/* This instruction is vmcall.  On non-VT architectures, it will generate a
+ * trap that we will then rewrite to the appropriate instruction.
+ */
+#define KVM_HYPERCALL ".byte 0x0f,0x01,0xc1"
+
+/* For KVM hypercalls, a three-byte sequence of either the vmrun or the vmmrun
+ * instruction.  The hypervisor may replace it with something else but only the
+ * instructions are guaranteed to be supported.
+ *
+ * Up to four arguments may be passed in rbx, rcx, rdx, and rsi respectively.
+ * The hypercall number should be placed in rax and the return value will be
+ * placed in rax.  No other registers will be clobbered unless explicited
+ * noted by the particular hypercall.
+ */
+
+static inline long kvm_hypercall0(unsigned int nr)
+{
+   long ret;
+   asm volatile(KVM_HYPERCALL
+: "=a"(ret)
+: "a"(nr));
+   return ret;
+}
+
+static inline long kvm_hypercall1(unsigned int nr, unsigned long p1)
+{
+   long ret;
+   asm volatile(KVM_HYPERCALL
+: "=a"(ret)
+: "a"(nr), "b"(p1));
+   return ret;
+}
+
+static inline long kvm_hypercall2(unsigned int nr, unsigned long p1,
+ unsigned long p2)
+{
+   long ret;
+   asm volatile(KVM_HYPERCALL
+: "=a"(ret)
+: "a"(nr), "b"(p1), "c"(p2));
+   return ret;
+}
+
+static inline long kvm_hypercall3(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3)
+{
+   long ret;
+   asm volatile(KVM_HYPERCALL
+: "=a"(ret)
+: "a"(nr), "b"(p1), "c"(p2), "d"(p3));
+   return ret;
+}
+
+static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
+ unsigned long p2, unsigned long p3,
+ unsigned long p4)
+{
+   long ret;
+   asm volatile(KVM_HYPERCALL
+: "=a"(ret)
+: "a"(nr), "b"(p1), "c"(p2), "d"(p3), "S"(p4));
+   return ret;
+}
+
+static inline int kvm_para_available(void)
+{
+   unsigned int eax, ebx, ecx, edx;
+   char signature[13];
+
+   cpuid(KVM_CPUID_SIGNATURE, &eax, &ebx, &ecx, &edx);
+   memcpy(signature + 0, &ebx, 4);
+   memcpy(signature + 4, &ecx, 4);
+   memcpy(signature + 8, &edx, 4);
+   signature[12] = 0;
+
+   if (strcmp(signature, "KVMKVMKVM") == 0)
+   return 1;
+
+   return 0;
+}
+
+static inline unsigned int kvm_arch_para_features(void)
+{
+   return cpuid_eax(KVM_CPUID_FEATURES);
+}
+
+#endif
+
+#endif
diff --git a/include/linux/kvm_para.h b/include/linux/kvm_para.h
index cc5dfb4..e4db25f 100644
--- a/include/linux/kvm_para.h
+++ b/include/linux/kvm_para.h
@@ -1,110 +1,29 @@
 #ifndef __LINUX_KVM_PARA_H
 #define __LINUX_KVM_PARA_H
 
-/* This CPUID returns the signature 'KVMKVMKVM' in ebx, ecx, and edx.  It
- * should be used to determine that a VM is running under KVM.
+/*
+ * This header file provides a method for making a hypercall to the host
+ * Architectures should define:
+ * - kvm_hypercall0, kvm_hypercall1...
+ * - kvm_arch_para_features
+ * - kvm_para_available
  */
-#define KVM_CPUID_SIGNATURE0x4000
-
-/* This CPUID returns a feature bitmap in eax.  Before enabling a particular
- * paravirtualization, the appropriate feature bit should be checked.
-

[kvm-devel] [PATCH 01/55] KVM: Portability: Split kvm_vcpu into arch dependent and independent parts (part 1)

2007-12-26 Thread Avi Kivity
From: Zhang Xiantao <[EMAIL PROTECTED]>

First step to split kvm_vcpu.  Currently, we just use an macro to define
the common fields in kvm_vcpu for all archs, and all archs need to define
its own kvm_vcpu struct.

Signed-off-by: Zhang Xiantao <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/ioapic.c  |2 +
 drivers/kvm/irq.c |1 +
 drivers/kvm/kvm.h |  154 -
 drivers/kvm/kvm_main.c|4 +-
 drivers/kvm/lapic.c   |2 +
 drivers/kvm/mmu.c |1 +
 drivers/kvm/svm.c |2 +-
 drivers/kvm/vmx.c |1 +
 drivers/kvm/x86.h |  117 ++
 drivers/kvm/x86_emulate.c |1 +
 10 files changed, 155 insertions(+), 130 deletions(-)

diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
index 8503d99..e14b7c7 100644
--- a/drivers/kvm/ioapic.c
+++ b/drivers/kvm/ioapic.c
@@ -27,6 +27,8 @@
  */
 
 #include "kvm.h"
+#include "x86.h"
+
 #include 
 #include 
 #include 
diff --git a/drivers/kvm/irq.c b/drivers/kvm/irq.c
index 7628c7f..59b47c5 100644
--- a/drivers/kvm/irq.c
+++ b/drivers/kvm/irq.c
@@ -22,6 +22,7 @@
 #include 
 
 #include "kvm.h"
+#include "x86.h"
 #include "irq.h"
 
 /*
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index eb006ed..db18d27 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -308,93 +308,37 @@ struct kvm_io_device *kvm_io_bus_find_dev(struct 
kvm_io_bus *bus, gpa_t addr);
 void kvm_io_bus_register_dev(struct kvm_io_bus *bus,
 struct kvm_io_device *dev);
 
-struct kvm_vcpu {
-   struct kvm *kvm;
-   struct preempt_notifier preempt_notifier;
-   int vcpu_id;
-   struct mutex mutex;
-   int   cpu;
-   u64 host_tsc;
-   struct kvm_run *run;
-   int interrupt_window_open;
-   int guest_mode;
-   unsigned long requests;
-   unsigned long irq_summary; /* bit vector: 1 per word in irq_pending */
-   DECLARE_BITMAP(irq_pending, KVM_NR_INTERRUPTS);
-   unsigned long regs[NR_VCPU_REGS]; /* for rsp: vcpu_load_rsp_rip() */
-   unsigned long rip;  /* needs vcpu_load_rsp_rip() */
-
-   unsigned long cr0;
-   unsigned long cr2;
-   unsigned long cr3;
-   unsigned long cr4;
-   unsigned long cr8;
-   u64 pdptrs[4]; /* pae */
-   u64 shadow_efer;
-   u64 apic_base;
-   struct kvm_lapic *apic;/* kernel irqchip context */
-#define VCPU_MP_STATE_RUNNABLE  0
-#define VCPU_MP_STATE_UNINITIALIZED 1
-#define VCPU_MP_STATE_INIT_RECEIVED 2
-#define VCPU_MP_STATE_SIPI_RECEIVED 3
-#define VCPU_MP_STATE_HALTED4
-   int mp_state;
-   int sipi_vector;
-   u64 ia32_misc_enable_msr;
-
-   struct kvm_mmu mmu;
-
-   struct kvm_mmu_memory_cache mmu_pte_chain_cache;
-   struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
-   struct kvm_mmu_memory_cache mmu_page_cache;
-   struct kvm_mmu_memory_cache mmu_page_header_cache;
-
-   gfn_t last_pt_write_gfn;
-   int   last_pt_write_count;
-   u64  *last_pte_updated;
-
-   struct kvm_guest_debug guest_debug;
-
-   struct i387_fxsave_struct host_fx_image;
-   struct i387_fxsave_struct guest_fx_image;
-   int fpu_active;
-   int guest_fpu_loaded;
-
-   int mmio_needed;
-   int mmio_read_completed;
-   int mmio_is_write;
-   int mmio_size;
-   unsigned char mmio_data[8];
+#ifdef CONFIG_HAS_IOMEM
+#define KVM_VCPU_MMIO  \
+   int mmio_needed;\
+   int mmio_read_completed;\
+   int mmio_is_write;  \
+   int mmio_size;  \
+   unsigned char mmio_data[8]; \
gpa_t mmio_phys_addr;
-   gva_t mmio_fault_cr2;
-   struct kvm_pio_request pio;
-   void *pio_data;
-   wait_queue_head_t wq;
 
-   int sigset_active;
-   sigset_t sigset;
+#else
+#define KVM_VCPU_MMIO
 
-   struct kvm_stat stat;
+#endif
 
-   struct {
-   int active;
-   u8 save_iopl;
-   struct kvm_save_segment {
-   u16 selector;
-   unsigned long base;
-   u32 limit;
-   u32 ar;
-   } tr, es, ds, fs, gs;
-   } rmode;
-   int halt_request; /* real mode on Intel only */
-
-   int cpuid_nent;
-   struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
-
-   /* emulate context */
-
-   struct x86_emulate_ctxt emulate_ctxt;
-};
+#define KVM_VCPU_COMM  \
+   struct kvm *kvm;\
+   struct preempt_notifier preempt_notifier;   \
+   int vcpu_id;\
+   struct mutex mutex; \
+   int   cpu

[kvm-devel] [PATCH 04/55] KVM: Export PIC reset for kernel device reset

2007-12-26 Thread Avi Kivity
From: Eddie Dong <[EMAIL PROTECTED]>

Signed-off-by: Yaozu (Eddie) Dong <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/i8259.c |6 ++
 drivers/kvm/irq.h   |1 +
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/kvm/i8259.c b/drivers/kvm/i8259.c
index a679157..f0dc2ee 100644
--- a/drivers/kvm/i8259.c
+++ b/drivers/kvm/i8259.c
@@ -181,10 +181,8 @@ int kvm_pic_read_irq(struct kvm_pic *s)
return intno;
 }
 
-static void pic_reset(void *opaque)
+void kvm_pic_reset(struct kvm_kpic_state *s)
 {
-   struct kvm_kpic_state *s = opaque;
-
s->last_irr = 0;
s->irr = 0;
s->imr = 0;
@@ -209,7 +207,7 @@ static void pic_ioport_write(void *opaque, u32 addr, u32 
val)
addr &= 1;
if (addr == 0) {
if (val & 0x10) {
-   pic_reset(s);   /* init */
+   kvm_pic_reset(s);   /* init */
/*
 * deassert a pending interrupt
 */
diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index 508280e..e08ae10 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -139,6 +139,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
 int kvm_create_lapic(struct kvm_vcpu *vcpu);
 void kvm_lapic_reset(struct kvm_vcpu *vcpu);
+void kvm_pic_reset(struct kvm_kpic_state *s);
 void kvm_free_lapic(struct kvm_vcpu *vcpu);
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 02/55] KVM: Move vmx_vcpu_reset() out of vmx_vcpu_setup()

2007-12-26 Thread Avi Kivity
Split guest reset code out of vmx_vcpu_setup().  Besides being cleaner, this
moves the realmode tss setup (which can sleep) outside vmx_vcpu_setup()
(which is executed with preemption enabled).

[izik: remove unused variable]

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |2 +-
 drivers/kvm/kvm_main.c |8 ++-
 drivers/kvm/svm.c  |4 +-
 drivers/kvm/vmx.c  |  178 
 4 files changed, 99 insertions(+), 93 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index db18d27..f7181a4 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -412,7 +412,7 @@ struct kvm_x86_ops {
/* Create, but do not attach this VCPU */
struct kvm_vcpu *(*vcpu_create)(struct kvm *kvm, unsigned id);
void (*vcpu_free)(struct kvm_vcpu *vcpu);
-   void (*vcpu_reset)(struct kvm_vcpu *vcpu);
+   int (*vcpu_reset)(struct kvm_vcpu *vcpu);
 
void (*prepare_guest_switch)(struct kvm_vcpu *vcpu);
void (*vcpu_load)(struct kvm_vcpu *vcpu, int cpu);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 41d4a93..ff77175 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -2122,7 +2122,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu, struct 
kvm_run *kvm_run)
pr_debug("vcpu %d received sipi with vector # %x\n",
   vcpu->vcpu_id, vcpu->sipi_vector);
kvm_lapic_reset(vcpu);
-   kvm_x86_ops->vcpu_reset(vcpu);
+   r = kvm_x86_ops->vcpu_reset(vcpu);
+   if (r)
+   return r;
vcpu->mp_state = VCPU_MP_STATE_RUNNABLE;
}
 
@@ -2637,7 +2639,9 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, int 
n)
BUG_ON((unsigned long)&vcpu->host_fx_image & 0xF);
 
vcpu_load(vcpu);
-   r = kvm_mmu_setup(vcpu);
+   r = kvm_x86_ops->vcpu_reset(vcpu);
+   if (r == 0)
+   r = kvm_mmu_setup(vcpu);
vcpu_put(vcpu);
if (r < 0)
goto free_vcpu;
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 09c4b14..4a70168 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -557,7 +557,7 @@ static void init_vmcb(struct vmcb *vmcb)
/* rdx = ?? */
 }
 
-static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
+static int svm_vcpu_reset(struct kvm_vcpu *vcpu)
 {
struct vcpu_svm *svm = to_svm(vcpu);
 
@@ -568,6 +568,8 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu)
svm->vmcb->save.cs.base = svm->vcpu.sipi_vector << 12;
svm->vmcb->save.cs.selector = svm->vcpu.sipi_vector << 8;
}
+
+   return 0;
 }
 
 static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, unsigned int id)
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 828d3cb..77083e4 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -1433,92 +1433,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
unsigned long a;
struct descriptor_table dt;
int i;
-   int ret = 0;
unsigned long kvm_vmx_return;
-   u64 msr;
u32 exec_control;
 
-   if (!init_rmode_tss(vmx->vcpu.kvm)) {
-   ret = -ENOMEM;
-   goto out;
-   }
-
-   vmx->vcpu.rmode.active = 0;
-
-   vmx->vcpu.regs[VCPU_REGS_RDX] = get_rdx_init_val();
-   set_cr8(&vmx->vcpu, 0);
-   msr = 0xfee0 | MSR_IA32_APICBASE_ENABLE;
-   if (vmx->vcpu.vcpu_id == 0)
-   msr |= MSR_IA32_APICBASE_BSP;
-   kvm_set_apic_base(&vmx->vcpu, msr);
-
-   fx_init(&vmx->vcpu);
-
-   /*
-* GUEST_CS_BASE should really be 0x, but VT vm86 mode
-* insists on having GUEST_CS_BASE == GUEST_CS_SELECTOR << 4.  Sigh.
-*/
-   if (vmx->vcpu.vcpu_id == 0) {
-   vmcs_write16(GUEST_CS_SELECTOR, 0xf000);
-   vmcs_writel(GUEST_CS_BASE, 0x000f);
-   } else {
-   vmcs_write16(GUEST_CS_SELECTOR, vmx->vcpu.sipi_vector << 8);
-   vmcs_writel(GUEST_CS_BASE, vmx->vcpu.sipi_vector << 12);
-   }
-   vmcs_write32(GUEST_CS_LIMIT, 0x);
-   vmcs_write32(GUEST_CS_AR_BYTES, 0x9b);
-
-   seg_setup(VCPU_SREG_DS);
-   seg_setup(VCPU_SREG_ES);
-   seg_setup(VCPU_SREG_FS);
-   seg_setup(VCPU_SREG_GS);
-   seg_setup(VCPU_SREG_SS);
-
-   vmcs_write16(GUEST_TR_SELECTOR, 0);
-   vmcs_writel(GUEST_TR_BASE, 0);
-   vmcs_write32(GUEST_TR_LIMIT, 0x);
-   vmcs_write32(GUEST_TR_AR_BYTES, 0x008b);
-
-   vmcs_write16(GUEST_LDTR_SELECTOR, 0);
-   vmcs_writel(GUEST_LDTR_BASE, 0);
-   vmcs_write32(GUEST_LDTR_LIMIT, 0x);
-   vmcs_write32(GUEST_LDTR_AR_BYTES, 0x00082);
-
-   vmcs_write32(GUEST_SYSENTER_CS, 0);
-   vmcs_writel(GUEST_SYSENTER_ESP, 0);
-   vmcs

Re: [kvm-devel] external module sched_in event

2007-12-25 Thread Avi Kivity
Andrea Arcangeli wrote:
> On Sun, Dec 23, 2007 at 07:37:40PM +0200, Avi Kivity wrote:
>   
>> The sched_in notifier needs to enable interrupts (but it must disable 
>> preemption to avoid recursion).
>> 
>
> Ok this update fixes the smp_call_function deadlock.
>
>   

I was able to boot a 4-way guest on a 2-way i386 host, so looks like
this works.  Thanks, it will reduce the maintenance burden needed for
the external module.

-- 
Do not meddle in the internals of kernels, for they are subtle and quick to 
panic.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] Find timing info between two VM exits

2007-12-24 Thread Avi Kivity
avadh patel wrote:
> Hi Avi,
>
> Sorry for the wrong question. My revised question is as follows:
> How can I find the time spent between vm-enter and vm-exit ?
> In other words, How can I find the number of clock ticks spent in 
> execution of VM's code between vm-enter and vm-exit ?

Linux 2.6.24 supports guest time counters in /proc/pid/stat; these sum 
up the amount of real time spent inside the guest.  To get clock tick 
counts, you can add rdtsc calls in vmx_vcpu_run or svm_vcpu_run, before 
and after the big asm block.


-- 
Any sufficiently difficult bug is indistinguishable from a feature.


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 3/5] KVM: add kvm_follow_page()

2007-12-24 Thread Avi Kivity
Marcelo Tosatti wrote:
>
>> I believe that a new get_users_pages_inatomic() is more suitable; 
>> Andrew, I'll write it if you agree.  Alternatively, walk_page_range() 
>> should take the lock itself, otherwise it is only usable if you don't 
>> care about correctness?
>> 
>
> Why not just export follow_page() ? That is exactly what KVM needs.
>
>   

Well, follow_page() needs the vma, and doesn't support large pages.  
It's too low-level to be a public API.

-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 3/5] KVM: add kvm_follow_page()

2007-12-24 Thread Avi Kivity
Marcelo Tosatti wrote:

 

 btw, the call to gfn_to_page() can happen in page_fault() instead of
 walk_addr(); that will reduce the amount of error handling, and will
 simplify the callers to walk_addr() that don't need the page.

 
>>> But the gfn in question is only known at walk_addr() time, so thats not
>>> possible.
>>>  
>>>   
>> It's just walker->gfn; certainly it is known in page_fault().
>> 
>
> Oh, you mean to grab walker->gfn before fetch() ?
>
>   

Yes.


-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] Find timing info between two VM exits

2007-12-23 Thread Avi Kivity
avadh patel wrote:
> Hi All,
>
> I was looking into the source code of kvm and modified qemu and trying 
> to find the time spent between two VM-exit in kvm.
> Is there a way to find this from user-space qemu, without modifying 
> the kernel module or libkvm?
>

What do you mean exactly?  Average time between vmexits?  Time between 
vmexit and vmenter?  time between vmenter and vmexit?  Or what code 
executes?

-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH] Add support >3G memory for ia64 guests.

2007-12-23 Thread Avi Kivity
Zhang, Xiantao wrote:
> From: Zhang Xiantao <[EMAIL PROTECTED]>
> Date: Mon, 24 Dec 2007 14:06:31 +0800
> Subject: [PATCH] kvm: qemu/ia64: guest with > 3G memory fix.
>
> This patch fix boot issue with >3G memory. Currently, set
> TARGET_PHYS_ADDR_SPACE_BITS to 36 bits, and safely support
> <64G memory for guests.
>   
Applied, thanks.

-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 3/5] KVM: add kvm_follow_page()

2007-12-23 Thread Avi Kivity
Marcelo Tosatti wrote:
> It can acquire the pagetablelock in the callback handler. But then,
> vm_normal_page() must also be exported.
>
> Are you guys OK with this ?
>
>   

Seems to me that requires fairly detailed mucking in mm details, just to 
get at a page.

I believe that a new get_users_pages_inatomic() is more suitable; 
Andrew, I'll write it if you agree.  Alternatively, walk_page_range() 
should take the lock itself, otherwise it is only usable if you don't 
care about correctness?

>  
> +static int kvm_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
> +void *private)
> +{
> + struct page **page = private;
> + struct vm_area_struct *vma;
> + pte_t *ptep, pte;
> + spinlock_t *ptl;
> + int err = -EFAULT;
> +
> + vma = find_vma(current->mm, addr);
> + if (!vma)
> + return err;
> +
> + ptep = pte_offset_map_lock(current->mm, pmd, addr, &ptl);
> + pte = *ptep;
> + if (!pte_present(pte))
> + goto unlock;
> +
> + *page = vm_normal_page(vma, addr, pte);
> + if (!*page)
> + goto unlock;
> +
> + get_page(*page);
> + err = 0;
> +unlock:
> + pte_unmap_unlock(ptep, ptl);
> + return err;
> +}
> +
>   

See what one has to write to get at a page?  Unreasonable IMO.  And 
that's without hugetlb support AFAICT (kvm will want that soon).

-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [patch 3/5] KVM: add kvm_follow_page()

2007-12-23 Thread Avi Kivity
Marcelo Tosatti wrote:
> On Sun, Dec 23, 2007 at 10:41:07AM +0200, Avi Kivity wrote:
>   
>> Avi Kivity wrote:
>> 
>>> Exactly.  But it is better to be explicit about it and pass the page
>>> directly like you did before.  I hate to make you go back-and-fourth,
>>> but I did not understand the issue completely before.
>>>
>>>   
>> btw, the call to gfn_to_page() can happen in page_fault() instead of
>> walk_addr(); that will reduce the amount of error handling, and will
>> simplify the callers to walk_addr() that don't need the page.
>> 
>
> But the gfn in question is only known at walk_addr() time, so thats not
> possible.
>   

It's just walker->gfn; certainly it is known in page_fault().

-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 27/50] KVM: Support assigning userspace memory to the guest

2007-12-23 Thread Avi Kivity
Avi Kivity wrote:
> From: Izik Eidus <[EMAIL PROTECTED]>
>
> Instead of having the kernel allocate memory to the guest, let userspace
> allocate it and pass the address to the kernel.
>
> This is required for s390 support, but also enables features like memory
> sharing and using hugetlbfs backed memory.
>
>   

[...]

> @@ -728,11 +752,27 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm 
> *kvm,
>  
>   memset(new.phys_mem, 0, npages * sizeof(struct page *));
>   memset(new.rmap, 0, npages * sizeof(*new.rmap));
> - for (i = 0; i < npages; ++i) {
> - new.phys_mem[i] = alloc_page(GFP_HIGHUSER
> -  | __GFP_ZERO);
> - if (!new.phys_mem[i])
> + if (user_alloc) {
> + unsigned long pages_num;
> +
> + new.user_alloc = 1;
> + down_read(¤t->mm->mmap_sem);
> +
> + pages_num = get_user_pages(current, current->mm,
> +mem->userspace_addr,
> +npages, 1, 0, new.phys_mem,
> +NULL);
> +
>   

I just combined a patch that changes the 'force' parameter to 
get_user_pages from 0 to 1, into this patch, to avoid introducing a bug 
and its fix in the same patchset.  I won't be resending this patch since 
the change is too trivial.  Same change applies to patch 48, "KVM: MMU: 
Partial swapping of guest memory".

-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] external module sched_in event

2007-12-23 Thread Avi Kivity
Andrea Arcangeli wrote:
> On Sat, Dec 22, 2007 at 09:13:44PM +0200, Avi Kivity wrote:
>   
>> Unfortunately, this fails badly on Intel i386:
>> 
>
> Hmm ok there's a definitive bug that I forgot a int1 kernel->kernel
> switch on x86 has no special debug stack like on x86-64. This will
> have a better chance to work, hope I got all offsets right by
> memory At least the offset "32" in the leal and eax + fastcall
> should all be right or I doubt it could survive the double
> dereferencing. Likely the one-more-derefence didn't oops there because
> you likely have >=1g of ram and there was a 25% chance of crashing due
> the lack of sched-in and 75% chance of crashing in the
> one-more-dereference in a more meaningful way.
>
>   

Now I see lots of

> BUG: warning at arch/i386/kernel/smp.c:701/smp_call_function_single() 
> (Not tainted)
>  [] __vcpu_clear+0x0/0x4a [kvm_intel]
>  [] smp_call_function_single+0x90/0x10c
>  [] __switch_to+0x174/0x18e
>  [] vcpu_clear+0x41/0x50 [kvm_intel]
>  [] vmx_vcpu_load+0x2e/0x103 [kvm_intel]
>  [] vmx_vcpu_put+0xc0/0xf3 [kvm_intel]
>  [] kvm_arch_vcpu_load+0x9/0xa [kvm]
>  [] preempt_notifier_trigger+0x5b/0xe1 [kvm]
>  [] pn_int1_handler+0x16/0x26 [kvm]
>  [] __mutex_lock_slowpath+0x45/0x77
>  [] mutex_lock+0x26/0x29
>  [] apic_update_ppr+0x17/0x3e [kvm]
>  [] kvm_mmu_page_fault+0x14/0x9b [kvm]
>  [] kvm_get_apic_interrupt+0x3a/0x4f [kvm]
>  [] kvm_handle_exit+0x6a/0x86 [kvm_intel]
>  [] kvm_arch_vcpu_ioctl_run+0x2a4/0x3aa [kvm]
>  [] kvm_vcpu_ioctl+0xce/0x298 [kvm]
>  [] __activate_task+0x1c/0x29
>  [] try_to_wake_up+0x3aa/0x3b4
>  [] _spin_unlock_irq+0x5/0x7
>  [] __wake_up_common+0x32/0x55
>  [] __wake_up+0x32/0x43
>  [] wake_futex+0x42/0x4c
>  [] futex_wake+0xa6/0xb0
>  [] do_futex+0x217/0xb7d
>  [] journal_stop+0x1cb/0x1d7 [jbd]
>  [] mapping_tagged+0x2b/0x32
>  [] kvm_vm_ioctl+0x172/0x183 [kvm]
>  [] _spin_unlock_irq+0x5/0x7
>  [] __sched_text_start+0x999/0xa21
>  [] smp_apic_timer_interrupt+0x76/0x80
>  [] kvm_vcpu_ioctl+0x0/0x298 [kvm]
>  [] do_ioctl+0x1f/0x62
>  [] vfs_ioctl+0x244/0x256
>  [] sys_ioctl+0x4c/0x64
>  [] syscall_call+0x7/0xb
>  ===

The sched_in notifier needs to enable interrupts (but it must disable 
preemption to avoid recursion).

Eventually I got this:

BUG: spinlock lockup on CPU#3, qemu-system-x86/4425, c07001cc (Not tainted)
 [] __vcpu_clear+0x0/0x4a [kvm_intel]
 [] _raw_spin_lock+0xb8/0xd9
 [] smp_call_function_single+0x9a/0x10c
 [] __switch_to+0x174/0x18e
 [] vcpu_clear+0x41/0x50 [kvm_intel]
 [] vmx_vcpu_load+0x2e/0x103 [kvm_intel]
 [] vmx_vcpu_put+0xc0/0xf3 [kvm_intel]
 [] kvm_arch_vcpu_load+0x9/0xa [kvm]
 [] preempt_notifier_trigger+0x5b/0xe1 [kvm]
 [] pn_int1_handler+0x16/0x26 [kvm]
 [] __mutex_lock_slowpath+0x45/0x77
 [] mutex_lock+0x26/0x29
 [] apic_update_ppr+0x17/0x3e [kvm]
 [] kvm_mmu_page_fault+0x14/0x9b [kvm]
 [] kvm_get_apic_interrupt+0x3a/0x4f [kvm]
 [] kvm_handle_exit+0x6a/0x86 [kvm_intel]
 [] kvm_arch_vcpu_ioctl_run+0x2a4/0x3aa [kvm]
 [] tcp_sendmsg+0x913/0xa04
 [] kvm_vcpu_ioctl+0xce/0x298 [kvm]
 [] __activate_task+0x1c/0x29
 [] try_to_wake_up+0x3aa/0x3b4
 [] _spin_unlock_irq+0x5/0x7
 [] __sched_text_start+0x999/0xa21
 [] core_sys_select+0x218/0x2f3
 [] futex_wake+0xa6/0xb0
 [] do_futex+0x217/0xb7d
 [] __dequeue_signal+0xff/0x14e
 [] dequeue_signal+0x36/0xae
 [] kvm_vm_ioctl+0x172/0x183 [kvm]
 [] ktime_get_ts+0x16/0x44
 [] ktime_get+0x12/0x34
 [] common_timer_get+0xf4/0x130
 [] kvm_vcpu_ioctl+0x0/0x298 [kvm]
 [] do_ioctl+0x1f/0x62
 [] vfs_ioctl+0x244/0x256
 [] copy_to_user+0x3c/0x50
 [] sys_ioctl+0x4c/0x64
 [] syscall_call+0x7/0xb
 ===

followed by lockup of the qemu process, but it may be due to interrupts 
being disabled.


-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 26/50] KVM: CodingStyle cleanup

2007-12-23 Thread Avi Kivity
From: Mike Day <[EMAIL PROTECTED]>

Signed-off-by: Mike D. Day <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |   32 +-
 drivers/kvm/kvm_main.c|   58 ++
 drivers/kvm/lapic.c   |3 +-
 drivers/kvm/mmu.c |   10 +++--
 drivers/kvm/paging_tmpl.h |2 +-
 drivers/kvm/svm.c |   48 +---
 drivers/kvm/svm.h |2 +-
 drivers/kvm/vmx.c |   60 +++
 drivers/kvm/vmx.h |8 ++--
 drivers/kvm/x86_emulate.c |   76 ++---
 10 files changed, 151 insertions(+), 148 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 9f10c37..ec5b498 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -528,7 +528,7 @@ extern struct kvm_x86_ops *kvm_x86_ops;
if (printk_ratelimit()) \
printk(KERN_ERR "kvm: %i: cpu%i " fmt,  \
   current->tgid, (vcpu)->vcpu_id , ## __VA_ARGS__); \
- } while(0)
+ } while (0)
 
 #define kvm_printf(kvm, fmt ...) printk(KERN_DEBUG fmt)
 #define vcpu_printf(vcpu, fmt...) kvm_printf(vcpu->kvm, fmt)
@@ -598,7 +598,7 @@ int kvm_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 
data);
 
 struct x86_emulate_ctxt;
 
-int kvm_emulate_pio (struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
+int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
 int size, unsigned port);
 int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
   int size, unsigned long count, int down,
@@ -607,7 +607,7 @@ void kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
 int kvm_emulate_halt(struct kvm_vcpu *vcpu);
 int emulate_invlpg(struct kvm_vcpu *vcpu, gva_t address);
 int emulate_clts(struct kvm_vcpu *vcpu);
-int emulator_get_dr(struct x86_emulate_ctxt* ctxt, int dr,
+int emulator_get_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long *dest);
 int emulator_set_dr(struct x86_emulate_ctxt *ctxt, int dr,
unsigned long value);
@@ -631,7 +631,7 @@ void kvm_put_guest_fpu(struct kvm_vcpu *vcpu);
 void kvm_flush_remote_tlbs(struct kvm *kvm);
 
 int emulator_read_std(unsigned long addr,
-  void *val,
+ void *val,
  unsigned int bytes,
  struct kvm_vcpu *vcpu);
 int emulator_write_emulated(unsigned long addr,
@@ -721,55 +721,55 @@ static inline struct kvm_mmu_page *page_header(hpa_t 
shadow_page)
 static inline u16 read_fs(void)
 {
u16 seg;
-   asm ("mov %%fs, %0" : "=g"(seg));
+   asm("mov %%fs, %0" : "=g"(seg));
return seg;
 }
 
 static inline u16 read_gs(void)
 {
u16 seg;
-   asm ("mov %%gs, %0" : "=g"(seg));
+   asm("mov %%gs, %0" : "=g"(seg));
return seg;
 }
 
 static inline u16 read_ldt(void)
 {
u16 ldt;
-   asm ("sldt %0" : "=g"(ldt));
+   asm("sldt %0" : "=g"(ldt));
return ldt;
 }
 
 static inline void load_fs(u16 sel)
 {
-   asm ("mov %0, %%fs" : : "rm"(sel));
+   asm("mov %0, %%fs" : : "rm"(sel));
 }
 
 static inline void load_gs(u16 sel)
 {
-   asm ("mov %0, %%gs" : : "rm"(sel));
+   asm("mov %0, %%gs" : : "rm"(sel));
 }
 
 #ifndef load_ldt
 static inline void load_ldt(u16 sel)
 {
-   asm ("lldt %0" : : "rm"(sel));
+   asm("lldt %0" : : "rm"(sel));
 }
 #endif
 
 static inline void get_idt(struct descriptor_table *table)
 {
-   asm ("sidt %0" : "=m"(*table));
+   asm("sidt %0" : "=m"(*table));
 }
 
 static inline void get_gdt(struct descriptor_table *table)
 {
-   asm ("sgdt %0" : "=m"(*table));
+   asm("sgdt %0" : "=m"(*table));
 }
 
 static inline unsigned long read_tr_base(void)
 {
u16 tr;
-   asm ("str %0" : "=g"(tr));
+   asm("str %0" : "=g"(tr));
return segment_base(tr);
 }
 
@@ -785,17 +785,17 @@ static inline unsigned long read_msr(unsigned long msr)
 
 static inline void fx_save(struct i387_fxsave_struct *image)
 {
-   asm ("fxsave (%0)":: "r" (image));
+   asm("fxsave (%0)":: "r" (image));
 }
 
 static inline void fx_restore(struct i387_fxsave_struct *image)
 {
-   asm ("fxrstor (%0)":: "r" (image));
+   asm("fxrstor (%0)":: "r" (image));
 }
 
 static inline void fpu_init(void)
 {
-   asm ("finit");
+   asm("finit");
 }
 

[kvm-devel] [PATCH 37/50] KVM: Portability: split kvm_vcpu_ioctl

2007-12-23 Thread Avi Kivity
From: Carsten Otte <[EMAIL PROTECTED]>

This patch splits kvm_vcpu_ioctl into archtecture independent parts, and
x86 specific parts which go to kvm_arch_vcpu_ioctl in x86.c.

Common ioctls for all architectures are:
KVM_RUN, KVM_GET/SET_(S-)REGS, KVM_TRANSLATE, KVM_INTERRUPT,
KVM_DEBUG_GUEST, KVM_SET_SIGNAL_MASK, KVM_GET/SET_FPU
Note that some PPC chips don't have an FPU, so we might need an #ifdef
around KVM_GET/SET_FPU one day.

x86 specific ioctls are:
KVM_GET/SET_LAPIC, KVM_SET_CPUID, KVM_GET/SET_MSRS

An interresting aspect is vcpu_load/vcpu_put. We now have a common
vcpu_load/put which does the preemption stuff, and an architecture
specific kvm_arch_vcpu_load/put. In the x86 case, this one calls the
vmx/svm function defined in kvm_x86_ops.

Signed-off-by: Carsten Otte <[EMAIL PROTECTED]>
Reviewed-by: Christian Borntraeger <[EMAIL PROTECTED]>
Reviewed-by: Christian Ehrhardt <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |9 ++
 drivers/kvm/kvm_main.c |  200 ++--
 drivers/kvm/x86.c  |  219 
 3 files changed, 234 insertions(+), 194 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index d56962d..1edf8a5 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -537,6 +537,10 @@ extern struct kvm_x86_ops *kvm_x86_ops;
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id);
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 
+void vcpu_load(struct kvm_vcpu *vcpu);
+void vcpu_put(struct kvm_vcpu *vcpu);
+
+
 int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int vcpu_size,
  struct module *module);
 void kvm_exit_x86(void);
@@ -655,6 +659,11 @@ int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
 
 long kvm_arch_dev_ioctl(struct file *filp,
unsigned int ioctl, unsigned long arg);
+long kvm_arch_vcpu_ioctl(struct file *filp,
+unsigned int ioctl, unsigned long arg);
+void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
+void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
+
 __init void kvm_arch_init(void);
 
 static inline void kvm_guest_enter(void)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 9f7370f..03d6069 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -90,8 +90,6 @@ static struct kvm_stats_debugfs_item {
 
 static struct dentry *debugfs_dir;
 
-#define MAX_IO_MSRS 256
-
 #define CR0_RESERVED_BITS  \
(~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
  | X86_CR0_ET | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM \
@@ -179,21 +177,21 @@ EXPORT_SYMBOL_GPL(kvm_put_guest_fpu);
 /*
  * Switches to specified vcpu, until a matching vcpu_put()
  */
-static void vcpu_load(struct kvm_vcpu *vcpu)
+void vcpu_load(struct kvm_vcpu *vcpu)
 {
int cpu;
 
mutex_lock(&vcpu->mutex);
cpu = get_cpu();
preempt_notifier_register(&vcpu->preempt_notifier);
-   kvm_x86_ops->vcpu_load(vcpu, cpu);
+   kvm_arch_vcpu_load(vcpu, cpu);
put_cpu();
 }
 
-static void vcpu_put(struct kvm_vcpu *vcpu)
+void vcpu_put(struct kvm_vcpu *vcpu)
 {
preempt_disable();
-   kvm_x86_ops->vcpu_put(vcpu);
+   kvm_arch_vcpu_put(vcpu);
preempt_notifier_unregister(&vcpu->preempt_notifier);
preempt_enable();
mutex_unlock(&vcpu->mutex);
@@ -2509,86 +2507,6 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int 
*db, int *l)
 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
 
 /*
- * Adapt set_msr() to msr_io()'s calling convention
- */
-static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
-{
-   return kvm_set_msr(vcpu, index, *data);
-}
-
-/*
- * Read or write a bunch of msrs. All parameters are kernel addresses.
- *
- * @return number of msrs set successfully.
- */
-static int __msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs,
-   struct kvm_msr_entry *entries,
-   int (*do_msr)(struct kvm_vcpu *vcpu,
- unsigned index, u64 *data))
-{
-   int i;
-
-   vcpu_load(vcpu);
-
-   for (i = 0; i < msrs->nmsrs; ++i)
-   if (do_msr(vcpu, entries[i].index, &entries[i].data))
-   break;
-
-   vcpu_put(vcpu);
-
-   return i;
-}
-
-/*
- * Read or write a bunch of msrs. Parameters are user addresses.
- *
- * @return number of msrs set successfully.
- */
-static int msr_io(struct kvm_vcpu *vcpu, struct kvm_msrs __user *user_msrs,
- int (*do_msr)(struct kvm_vcpu *vcpu,
-   unsigned index, u64 *data),
- int writeback)
-{
-   struct kvm_msrs msrs;
-   struct kvm_msr_entry *entries;
-   int r, n;
-   unsigned size;
-
-   r = -EFAULT;
-   if (

[kvm-devel] [PATCH 20/50] KVM: Remove the usage of page->private field by rmap

2007-12-23 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

When kvm uses user-allocated pages in the future for the guest, we won't
be able to use page->private for rmap, since page->rmap is reserved for
the filesystem.  So we move the rmap base pointers to the memory slot.

A side effect of this is that we need to store the gfn of each gpte in
the shadow pages, since the memory slot is addressed by gfn, instead of
hfn like struct page.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |6 ++-
 drivers/kvm/kvm_main.c|   11 +++-
 drivers/kvm/mmu.c |  122 ++---
 drivers/kvm/paging_tmpl.h |3 +-
 4 files changed, 86 insertions(+), 56 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 08ffc82..80cfb99 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -126,6 +126,8 @@ struct kvm_mmu_page {
union kvm_mmu_page_role role;
 
u64 *spt;
+   /* hold the gfn of each spte inside spt */
+   gfn_t *gfns;
unsigned long slot_bitmap; /* One bit set per slot which has memory
* in this shadow page.
*/
@@ -159,7 +161,7 @@ struct kvm_mmu {
u64 *pae_root;
 };
 
-#define KVM_NR_MEM_OBJS 20
+#define KVM_NR_MEM_OBJS 40
 
 struct kvm_mmu_memory_cache {
int nobjs;
@@ -402,6 +404,7 @@ struct kvm_memory_slot {
unsigned long npages;
unsigned long flags;
struct page **phys_mem;
+   unsigned long *rmap;
unsigned long *dirty_bitmap;
 };
 
@@ -554,6 +557,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
 
 extern hpa_t bad_page_address;
 
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index f7566b9..ac563fc 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -309,6 +309,8 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot 
*free,
__free_page(free->phys_mem[i]);
vfree(free->phys_mem);
}
+   if (!dont || free->rmap != dont->rmap)
+   vfree(free->rmap);
 
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
vfree(free->dirty_bitmap);
@@ -719,13 +721,18 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
if (!new.phys_mem)
goto out_unlock;
 
+   new.rmap = vmalloc(npages * sizeof(struct page*));
+
+   if (!new.rmap)
+   goto out_unlock;
+
memset(new.phys_mem, 0, npages * sizeof(struct page *));
+   memset(new.rmap, 0, npages * sizeof(*new.rmap));
for (i = 0; i < npages; ++i) {
new.phys_mem[i] = alloc_page(GFP_HIGHUSER
 | __GFP_ZERO);
if (!new.phys_mem[i])
goto out_unlock;
-   set_page_private(new.phys_mem[i],0);
}
}
 
@@ -909,7 +916,7 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, struct 
kvm_irqchip *chip)
return r;
 }
 
-static gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
int i;
struct kvm_mem_alias *alias;
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index d347e89..72757db 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -276,7 +276,7 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
   rmap_desc_cache, 1);
if (r)
goto out;
-   r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 4);
+   r = mmu_topup_memory_cache_page(&vcpu->mmu_page_cache, 8);
if (r)
goto out;
r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
@@ -327,35 +327,52 @@ static void mmu_free_rmap_desc(struct kvm_rmap_desc *rd)
 }
 
 /*
+ * Take gfn and return the reverse mapping to it.
+ * Note: gfn must be unaliased before this function get called
+ */
+
+static unsigned long *gfn_to_rmap(struct kvm *kvm, gfn_t gfn)
+{
+   struct kvm_memory_slot *slot;
+
+   slot = gfn_to_memslot(kvm, gfn);
+   return &slot->rmap[gfn - slot->base_gfn];
+}
+
+/*
  * Reverse mapping data structures:
  *
- * If page->private bit zero is zero, then page->private points to the
- * shadow page table entry that points to page_address(page).
+ * If rmapp bit zero is zero, then rmapp point to the shadw page table entry
+ * that points to page_address(page).
  *
- * If page->private bit zero is one,

[kvm-devel] [PATCH 50/50] KVM: Allocate userspace memory for older userspace

2007-12-23 Thread Avi Kivity
From: Anthony Liguori <[EMAIL PROTECTED]>

Allocate a userspace buffer for older userspaces.  Also eliminate phys_mem
buffer.  The memset() in kvmctl really kills initial memory usage but swapping
works even with old userspaces.

A side effect is that maximum guest side is reduced for older userspace on
i386.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |2 -
 drivers/kvm/kvm_main.c |   83 +--
 2 files changed, 30 insertions(+), 55 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index e8a21e8..eb006ed 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -406,10 +406,8 @@ struct kvm_memory_slot {
gfn_t base_gfn;
unsigned long npages;
unsigned long flags;
-   struct page **phys_mem;
unsigned long *rmap;
unsigned long *dirty_bitmap;
-   int user_alloc; /* user allocated memory */
unsigned long userspace_addr;
 };
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 1c64047..3aec716 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -42,6 +42,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -300,36 +301,21 @@ static struct kvm *kvm_create_vm(void)
return kvm;
 }
 
-static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
-{
-   int i;
-
-   for (i = 0; i < free->npages; ++i)
-   if (free->phys_mem[i])
-   __free_page(free->phys_mem[i]);
-}
-
 /*
  * Free any memory in @free but not in @dont.
  */
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
  struct kvm_memory_slot *dont)
 {
-   if (!dont || free->phys_mem != dont->phys_mem)
-   if (free->phys_mem) {
-   if (!free->user_alloc)
-   kvm_free_kernel_physmem(free);
-   vfree(free->phys_mem);
-   }
if (!dont || free->rmap != dont->rmap)
vfree(free->rmap);
 
if (!dont || free->dirty_bitmap != dont->dirty_bitmap)
vfree(free->dirty_bitmap);
 
-   free->phys_mem = NULL;
free->npages = 0;
free->dirty_bitmap = NULL;
+   free->rmap = NULL;
 }
 
 static void kvm_free_physmem(struct kvm *kvm)
@@ -712,10 +698,6 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
goto out_unlock;
}
 
-   /* Deallocate if slot is being removed */
-   if (!npages)
-   new.phys_mem = NULL;
-
/* Free page dirty bitmap if unneeded */
if (!(new.flags & KVM_MEM_LOG_DIRTY_PAGES))
new.dirty_bitmap = NULL;
@@ -723,29 +705,27 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
r = -ENOMEM;
 
/* Allocate if a slot is being created */
-   if (npages && !new.phys_mem) {
-   new.phys_mem = vmalloc(npages * sizeof(struct page *));
-
-   if (!new.phys_mem)
-   goto out_unlock;
-
+   if (npages && !new.rmap) {
new.rmap = vmalloc(npages * sizeof(struct page *));
 
if (!new.rmap)
goto out_unlock;
 
-   memset(new.phys_mem, 0, npages * sizeof(struct page *));
memset(new.rmap, 0, npages * sizeof(*new.rmap));
-   if (user_alloc) {
-   new.user_alloc = 1;
+
+   if (user_alloc)
new.userspace_addr = mem->userspace_addr;
-   } else {
-   for (i = 0; i < npages; ++i) {
-   new.phys_mem[i] = alloc_page(GFP_HIGHUSER
-| __GFP_ZERO);
-   if (!new.phys_mem[i])
-   goto out_unlock;
-   }
+   else {
+   down_write(¤t->mm->mmap_sem);
+   new.userspace_addr = do_mmap(NULL, 0,
+npages * PAGE_SIZE,
+PROT_READ | PROT_WRITE,
+MAP_SHARED | MAP_ANONYMOUS,
+0);
+   up_write(¤t->mm->mmap_sem);
+
+   if (IS_ERR((void *)new.userspace_addr))
+   goto out_unlock;
}
}
 
@@ -1010,6 +990,8 @@ struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, 
gfn_t gfn)
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_memory_slot *slot;
+   struct page *page[1];
+   int npages;
 
gfn = unalias_gfn(kvm,

[kvm-devel] [PATCH 47/50] KVM: MMU: Make gfn_to_page() always safe

2007-12-23 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

In case the page is not present in the guest memory map, return a dummy
page the guest can scribble on.

This simplifies error checking in its users.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |3 ++-
 drivers/kvm/kvm_main.c|   26 ++
 drivers/kvm/mmu.c |   16 +---
 drivers/kvm/paging_tmpl.h |7 ++-
 4 files changed, 23 insertions(+), 29 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 6ae7b63..0c17c76 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -565,8 +565,9 @@ static inline int is_error_hpa(hpa_t hpa) { return hpa >> 
HPA_MSB; }
 hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva);
 struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva);
 
-extern hpa_t bad_page_address;
+extern struct page *bad_page;
 
+int is_error_page(struct page *page);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index a1a3be9..ebfb967 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -993,6 +993,12 @@ static int kvm_vm_ioctl_set_irqchip(struct kvm *kvm, 
struct kvm_irqchip *chip)
return r;
 }
 
+int is_error_page(struct page *page)
+{
+   return page == bad_page;
+}
+EXPORT_SYMBOL_GPL(is_error_page);
+
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
 {
int i;
@@ -1034,7 +1040,7 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
gfn = unalias_gfn(kvm, gfn);
slot = __gfn_to_memslot(kvm, gfn);
if (!slot)
-   return NULL;
+   return bad_page;
return slot->phys_mem[gfn - slot->base_gfn];
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
@@ -1054,7 +1060,7 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void 
*data, int offset,
struct page *page;
 
page = gfn_to_page(kvm, gfn);
-   if (!page)
+   if (is_error_page(page))
return -EFAULT;
page_virt = kmap_atomic(page, KM_USER0);
 
@@ -1092,7 +1098,7 @@ int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, 
const void *data,
struct page *page;
 
page = gfn_to_page(kvm, gfn);
-   if (!page)
+   if (is_error_page(page))
return -EFAULT;
page_virt = kmap_atomic(page, KM_USER0);
 
@@ -1130,7 +1136,7 @@ int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int 
offset, int len)
struct page *page;
 
page = gfn_to_page(kvm, gfn);
-   if (!page)
+   if (is_error_page(page))
return -EFAULT;
page_virt = kmap_atomic(page, KM_USER0);
 
@@ -3068,7 +3074,7 @@ static struct page *kvm_vm_nopage(struct vm_area_struct 
*vma,
 
pgoff = ((address - vma->vm_start) >> PAGE_SHIFT) + vma->vm_pgoff;
page = gfn_to_page(kvm, pgoff);
-   if (!page)
+   if (is_error_page(page))
return NOPAGE_SIGBUS;
get_page(page);
if (type != NULL)
@@ -3383,7 +3389,7 @@ static struct sys_device kvm_sysdev = {
.cls = &kvm_sysdev_class,
 };
 
-hpa_t bad_page_address;
+struct page *bad_page;
 
 static inline
 struct kvm_vcpu *preempt_notifier_to_vcpu(struct preempt_notifier *pn)
@@ -3512,7 +3518,6 @@ EXPORT_SYMBOL_GPL(kvm_exit_x86);
 
 static __init int kvm_init(void)
 {
-   static struct page *bad_page;
int r;
 
r = kvm_mmu_module_init();
@@ -3523,16 +3528,13 @@ static __init int kvm_init(void)
 
kvm_arch_init();
 
-   bad_page = alloc_page(GFP_KERNEL);
+   bad_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 
if (bad_page == NULL) {
r = -ENOMEM;
goto out;
}
 
-   bad_page_address = page_to_pfn(bad_page) << PAGE_SHIFT;
-   memset(__va(bad_page_address), 0, PAGE_SIZE);
-
return 0;
 
 out:
@@ -3545,7 +3547,7 @@ out4:
 static __exit void kvm_exit(void)
 {
kvm_exit_debug();
-   __free_page(pfn_to_page(bad_page_address >> PAGE_SHIFT));
+   __free_page(bad_page);
kvm_mmu_module_exit();
 }
 
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index bbf5eb4..2ad14fb 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -850,23 +850,17 @@ static void page_header_update_slot(struct kvm *kvm, void 
*pte, gpa_t gpa)
__set_bit(slot, &page_head->slot_bitmap);
 }
 
-hpa_t safe_gpa_to_hpa(struct kvm *kvm, gpa_t gpa)
-{
-   hpa_t hpa = gpa_to_hpa(kvm, gpa);
-
-   return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
-}
-
 hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa)
 {
struct page *page;
+   hpa_t hpa;
 
ASSERT((gpa & HPA_ERR_MASK) == 0);
page = gfn_to_page(kvm, gpa >> PAGE_SHIFT);
-   if (!page

[kvm-devel] [PATCH 48/50] KVM: MMU: Partial swapping of guest memory

2007-12-23 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

This allows guest memory to be swapped.  Pages which are currently mapped
via shadow page tables are pinned into memory, but all other pages can
be freely swapped.

The patch makes gfn_to_page() elevate the page's reference count, and
introduces kvm_release_page() that pairs with it.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |2 +
 drivers/kvm/kvm_main.c|   83 +
 drivers/kvm/mmu.c |   14 +++-
 drivers/kvm/paging_tmpl.h |   26 --
 4 files changed, 84 insertions(+), 41 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 0c17c76..df0711c 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -409,6 +409,7 @@ struct kvm_memory_slot {
unsigned long *rmap;
unsigned long *dirty_bitmap;
int user_alloc; /* user allocated memory */
+   unsigned long userspace_addr;
 };
 
 struct kvm {
@@ -570,6 +571,7 @@ extern struct page *bad_page;
 int is_error_page(struct page *page);
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
+void kvm_release_page(struct page *page);
 int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
int len);
 int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index ebfb967..1c64047 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -300,19 +300,6 @@ static struct kvm *kvm_create_vm(void)
return kvm;
 }
 
-static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
-{
-   int i;
-
-   for (i = 0; i < free->npages; ++i) {
-   if (free->phys_mem[i]) {
-   if (!PageReserved(free->phys_mem[i]))
-   SetPageDirty(free->phys_mem[i]);
-   page_cache_release(free->phys_mem[i]);
-   }
-   }
-}
-
 static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
 {
int i;
@@ -330,9 +317,7 @@ static void kvm_free_physmem_slot(struct kvm_memory_slot 
*free,
 {
if (!dont || free->phys_mem != dont->phys_mem)
if (free->phys_mem) {
-   if (free->user_alloc)
-   kvm_free_userspace_physmem(free);
-   else
+   if (!free->user_alloc)
kvm_free_kernel_physmem(free);
vfree(free->phys_mem);
}
@@ -361,7 +346,7 @@ static void free_pio_guest_pages(struct kvm_vcpu *vcpu)
 
for (i = 0; i < ARRAY_SIZE(vcpu->pio.guest_pages); ++i)
if (vcpu->pio.guest_pages[i]) {
-   __free_page(vcpu->pio.guest_pages[i]);
+   kvm_release_page(vcpu->pio.guest_pages[i]);
vcpu->pio.guest_pages[i] = NULL;
}
 }
@@ -752,19 +737,8 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
memset(new.phys_mem, 0, npages * sizeof(struct page *));
memset(new.rmap, 0, npages * sizeof(*new.rmap));
if (user_alloc) {
-   unsigned long pages_num;
-
new.user_alloc = 1;
-   down_read(¤t->mm->mmap_sem);
-
-   pages_num = get_user_pages(current, current->mm,
-  mem->userspace_addr,
-  npages, 1, 0, new.phys_mem,
-  NULL);
-
-   up_read(¤t->mm->mmap_sem);
-   if (pages_num != npages)
-   goto out_unlock;
+   new.userspace_addr = mem->userspace_addr;
} else {
for (i = 0; i < npages; ++i) {
new.phys_mem[i] = alloc_page(GFP_HIGHUSER
@@ -1039,12 +1013,39 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 
gfn = unalias_gfn(kvm, gfn);
slot = __gfn_to_memslot(kvm, gfn);
-   if (!slot)
+   if (!slot) {
+   get_page(bad_page);
return bad_page;
+   }
+   if (slot->user_alloc) {
+   struct page *page[1];
+   int npages;
+
+   down_read(¤t->mm->mmap_sem);
+   npages = get_user_pages(current, current->mm,
+   slot->userspace_addr
+   + (gfn - slot->base_gfn) * PAGE_SIZE, 1,
+   1, 0, page, NULL);
+   up_read(¤t->mm->mmap

[kvm-devel] [PATCH 45/50] KVM: MMU: Add rmap_next(), a helper for walking kvm rmaps

2007-12-23 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/mmu.c |   45 +++--
 1 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index f52604a..14e54e3 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -456,28 +456,53 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
}
 }
 
-static void rmap_write_protect(struct kvm *kvm, u64 gfn)
+static u64 *rmap_next(struct kvm *kvm, unsigned long *rmapp, u64 *spte)
 {
struct kvm_rmap_desc *desc;
+   struct kvm_rmap_desc *prev_desc;
+   u64 *prev_spte;
+   int i;
+
+   if (!*rmapp)
+   return NULL;
+   else if (!(*rmapp & 1)) {
+   if (!spte)
+   return (u64 *)*rmapp;
+   return NULL;
+   }
+   desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
+   prev_desc = NULL;
+   prev_spte = NULL;
+   while (desc) {
+   for (i = 0; i < RMAP_EXT && desc->shadow_ptes[i]; ++i) {
+   if (prev_spte == spte)
+   return desc->shadow_ptes[i];
+   prev_spte = desc->shadow_ptes[i];
+   }
+   desc = desc->more;
+   }
+   return NULL;
+}
+
+static void rmap_write_protect(struct kvm *kvm, u64 gfn)
+{
unsigned long *rmapp;
u64 *spte;
+   u64 *prev_spte;
 
gfn = unalias_gfn(kvm, gfn);
rmapp = gfn_to_rmap(kvm, gfn);
 
-   while (*rmapp) {
-   if (!(*rmapp & 1))
-   spte = (u64 *)*rmapp;
-   else {
-   desc = (struct kvm_rmap_desc *)(*rmapp & ~1ul);
-   spte = desc->shadow_ptes[0];
-   }
+   spte = rmap_next(kvm, rmapp, NULL);
+   while (spte) {
BUG_ON(!spte);
BUG_ON(!(*spte & PT_PRESENT_MASK));
BUG_ON(!(*spte & PT_WRITABLE_MASK));
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
-   rmap_remove(kvm, spte);
-   set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
+   prev_spte = spte;
+   spte = rmap_next(kvm, rmapp, spte);
+   rmap_remove(kvm, prev_spte);
+   set_shadow_pte(prev_spte, *prev_spte & ~PT_WRITABLE_MASK);
kvm_flush_remote_tlbs(kvm);
}
 }
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 43/50] KVM: MMU: Simplify page table walker

2007-12-23 Thread Avi Kivity
Simplify the walker level loop not to carry so much information from one
loop to the next.  In addition to being complex, this made kmap_atomic()
critical sections difficult to manage.

As a result of this change, kmap_atomic() sections are limited to actually
touching the guest pte, which allows the other functions called from the
walker to do sleepy operations.  This will happen when we enable swapping.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/paging_tmpl.h |  124 +---
 1 files changed, 48 insertions(+), 76 deletions(-)

diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index a9e687b..bab1b7f 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -59,32 +59,12 @@
 struct guest_walker {
int level;
gfn_t table_gfn[PT_MAX_FULL_LEVELS];
-   pt_element_t *table;
pt_element_t pte;
-   pt_element_t *ptep;
-   struct page *page;
-   int index;
pt_element_t inherited_ar;
gfn_t gfn;
u32 error_code;
 };
 
-static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu,
-   int write_fault,
-   pt_element_t *ptep,
-   gfn_t table_gfn)
-{
-   gpa_t pte_gpa;
-
-   if (write_fault && !is_dirty_pte(*ptep)) {
-   mark_page_dirty(vcpu->kvm, table_gfn);
-   *ptep |= PT_DIRTY_MASK;
-   pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT);
-   pte_gpa += offset_in_page(ptep);
-   kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep));
-   }
-}
-
 /*
  * Fetch a guest pte for a guest virtual address
  */
@@ -94,105 +74,99 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
 {
hpa_t hpa;
struct kvm_memory_slot *slot;
-   pt_element_t *ptep;
-   pt_element_t root;
+   struct page *page;
+   pt_element_t *table;
+   pt_element_t pte;
gfn_t table_gfn;
+   unsigned index;
+   gpa_t pte_gpa;
 
pgprintk("%s: addr %lx\n", __FUNCTION__, addr);
walker->level = vcpu->mmu.root_level;
-   walker->table = NULL;
-   walker->page = NULL;
-   walker->ptep = NULL;
-   root = vcpu->cr3;
+   pte = vcpu->cr3;
 #if PTTYPE == 64
if (!is_long_mode(vcpu)) {
-   walker->ptep = &vcpu->pdptrs[(addr >> 30) & 3];
-   root = *walker->ptep;
-   walker->pte = root;
-   if (!(root & PT_PRESENT_MASK))
+   pte = vcpu->pdptrs[(addr >> 30) & 3];
+   if (!is_present_pte(pte))
goto not_present;
--walker->level;
}
 #endif
-   table_gfn = (root & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
-   walker->table_gfn[walker->level - 1] = table_gfn;
-   pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
-walker->level - 1, table_gfn);
-   slot = gfn_to_memslot(vcpu->kvm, table_gfn);
-   hpa = safe_gpa_to_hpa(vcpu->kvm, root & PT64_BASE_ADDR_MASK);
-   walker->page = pfn_to_page(hpa >> PAGE_SHIFT);
-   walker->table = kmap_atomic(walker->page, KM_USER0);
-
ASSERT((!is_long_mode(vcpu) && is_pae(vcpu)) ||
   (vcpu->cr3 & CR3_NONPAE_RESERVED_BITS) == 0);
 
walker->inherited_ar = PT_USER_MASK | PT_WRITABLE_MASK;
 
for (;;) {
-   int index = PT_INDEX(addr, walker->level);
-   hpa_t paddr;
+   index = PT_INDEX(addr, walker->level);
 
-   ptep = &walker->table[index];
-   walker->index = index;
-   ASSERT(((unsigned long)walker->table & PAGE_MASK) ==
-  ((unsigned long)ptep & PAGE_MASK));
+   table_gfn = (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT;
+   walker->table_gfn[walker->level - 1] = table_gfn;
+   pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
+walker->level - 1, table_gfn);
+
+   slot = gfn_to_memslot(vcpu->kvm, table_gfn);
+   hpa = safe_gpa_to_hpa(vcpu->kvm, pte & PT64_BASE_ADDR_MASK);
+   page = pfn_to_page(hpa >> PAGE_SHIFT);
 
-   if (!is_present_pte(*ptep))
+   table = kmap_atomic(page, KM_USER0);
+   pte = table[index];
+   kunmap_atomic(table, KM_USER0);
+
+   if (!is_present_pte(pte))
goto not_present;
 
-   if (write_fault && !is_writeble_pte(*ptep))
+   if (write_fault && !is_writeble_pte(pte))
if (user_fault || is_write_protection(vcpu))
goto access

[kvm-devel] [PATCH 49/50] KVM: Use virtual cpu accounting if available for guest times.

2007-12-23 Thread Avi Kivity
From: Christian Borntraeger <[EMAIL PROTECTED]>

ppc and s390 offer the possibility to track process times precisely
by looking at cpu timer on every context switch, irq, softirq etc.
We can use that infrastructure as well for guest time accounting.
We need to account the used time before we change the state.
This patch adds a call to account_system_vtime to kvm_guest_enter
and kvm_guest exit. If CONFIG_VIRT_CPU_ACCOUNTING is not set,
account_system_vtime is defined in hardirq.h as an empty function,
which means this patch does not change the behaviour on other
platforms.

I compile tested this patch on x86 and function tested the patch on
s390.

Signed-off-by: Christian Borntraeger <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index df0711c..e8a21e8 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -7,6 +7,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -671,11 +672,13 @@ __init void kvm_arch_init(void);
 
 static inline void kvm_guest_enter(void)
 {
+   account_system_vtime(current);
current->flags |= PF_VCPU;
 }
 
 static inline void kvm_guest_exit(void)
 {
+   account_system_vtime(current);
current->flags &= ~PF_VCPU;
 }
 
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 46/50] KVM: MMU: Keep a reverse mapping of non-writable translations

2007-12-23 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

The current kvm mmu only reverse maps writable translation.  This is used
to write-protect a page in case it becomes a pagetable.

But with swapping support, we need a reverse mapping of read-only pages as
well:  when we evict a page, we need to remove any mapping to it, whether
writable or not.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/mmu.c |   23 +++
 1 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 14e54e3..bbf5eb4 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -211,8 +211,8 @@ static int is_io_pte(unsigned long pte)
 
 static int is_rmap_pte(u64 pte)
 {
-   return (pte & (PT_WRITABLE_MASK | PT_PRESENT_MASK))
-   == (PT_WRITABLE_MASK | PT_PRESENT_MASK);
+   return pte != shadow_trap_nonpresent_pte
+   && pte != shadow_notrap_nonpresent_pte;
 }
 
 static void set_shadow_pte(u64 *sptep, u64 spte)
@@ -488,7 +488,6 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 {
unsigned long *rmapp;
u64 *spte;
-   u64 *prev_spte;
 
gfn = unalias_gfn(kvm, gfn);
rmapp = gfn_to_rmap(kvm, gfn);
@@ -497,13 +496,11 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn)
while (spte) {
BUG_ON(!spte);
BUG_ON(!(*spte & PT_PRESENT_MASK));
-   BUG_ON(!(*spte & PT_WRITABLE_MASK));
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
-   prev_spte = spte;
-   spte = rmap_next(kvm, rmapp, spte);
-   rmap_remove(kvm, prev_spte);
-   set_shadow_pte(prev_spte, *prev_spte & ~PT_WRITABLE_MASK);
+   if (is_writeble_pte(*spte))
+   set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
kvm_flush_remote_tlbs(kvm);
+   spte = rmap_next(kvm, rmapp, spte);
}
 }
 
@@ -908,14 +905,18 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, 
hpa_t p)
table = __va(table_addr);
 
if (level == 1) {
+   int was_rmapped;
+
pte = table[index];
+   was_rmapped = is_rmap_pte(pte);
if (is_shadow_present_pte(pte) && is_writeble_pte(pte))
return 0;
mark_page_dirty(vcpu->kvm, v >> PAGE_SHIFT);
page_header_update_slot(vcpu->kvm, table, v);
table[index] = p | PT_PRESENT_MASK | PT_WRITABLE_MASK |
PT_USER_MASK;
-   rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
+   if (!was_rmapped)
+   rmap_add(vcpu, &table[index], v >> PAGE_SHIFT);
return 0;
}
 
@@ -1424,10 +1425,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, 
int slot)
pt = page->spt;
for (i = 0; i < PT64_ENT_PER_PAGE; ++i)
/* avoid RMW */
-   if (pt[i] & PT_WRITABLE_MASK) {
-   rmap_remove(kvm, &pt[i]);
+   if (pt[i] & PT_WRITABLE_MASK)
pt[i] &= ~PT_WRITABLE_MASK;
-   }
}
 }
 
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 44/50] KVM: x86 emulator: cmc, clc, cli, sti

2007-12-23 Thread Avi Kivity
From: Nitin A Kamble <[EMAIL PROTECTED]>

Instruction: cmc, clc, cli, sti
opcodes: 0xf5, 0xf8, 0xfa, 0xfb respectively.

[avi: fix reference to EFLG_IF which is not defined anywhere]

Signed-off-by: Nitin A Kamble <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |   21 +++--
 1 files changed, 19 insertions(+), 2 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 72621c9..af98ea1 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -161,10 +161,10 @@ static u8 opcode_table[256] = {
ImplicitOps, SrcImm|ImplicitOps, 0, SrcImmByte|ImplicitOps, 0, 0, 0, 0,
/* 0xF0 - 0xF7 */
0, 0, 0, 0,
-   ImplicitOps, 0,
+   ImplicitOps, ImplicitOps,
ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM,
/* 0xF8 - 0xFF */
-   0, 0, 0, 0,
+   ImplicitOps, 0, ImplicitOps, ImplicitOps,
0, 0, ByteOp | DstMem | SrcNone | ModRM, DstMem | SrcNone | ModRM
 };
 
@@ -1476,6 +1476,23 @@ special_insn:
case 0xf4:  /* hlt */
ctxt->vcpu->halt_request = 1;
goto done;
+   case 0xf5:  /* cmc */
+   /* complement carry flag from eflags reg */
+   ctxt->eflags ^= EFLG_CF;
+   c->dst.type = OP_NONE;  /* Disable writeback. */
+   break;
+   case 0xf8: /* clc */
+   ctxt->eflags &= ~EFLG_CF;
+   c->dst.type = OP_NONE;  /* Disable writeback. */
+   break;
+   case 0xfa: /* cli */
+   ctxt->eflags &= ~X86_EFLAGS_IF;
+   c->dst.type = OP_NONE;  /* Disable writeback. */
+   break;
+   case 0xfb: /* sti */
+   ctxt->eflags |= X86_EFLAGS_IF;
+   c->dst.type = OP_NONE;  /* Disable writeback. */
+   break;
}
if (c->rep_prefix) {
if (c->regs[VCPU_REGS_RCX] == 0) {
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 27/50] KVM: Support assigning userspace memory to the guest

2007-12-23 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

Instead of having the kernel allocate memory to the guest, let userspace
allocate it and pass the address to the kernel.

This is required for s390 support, but also enables features like memory
sharing and using hugetlbfs backed memory.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |1 +
 drivers/kvm/kvm_main.c |   81 +--
 include/linux/kvm.h|   12 +++
 3 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index ec5b498..3eaed4d 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -408,6 +408,7 @@ struct kvm_memory_slot {
struct page **phys_mem;
unsigned long *rmap;
unsigned long *dirty_bitmap;
+   int user_alloc; /* user allocated memory */
 };
 
 struct kvm {
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 47ffefb..9633fd3 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -40,6 +40,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -300,19 +301,40 @@ static struct kvm *kvm_create_vm(void)
return kvm;
 }
 
+static void kvm_free_userspace_physmem(struct kvm_memory_slot *free)
+{
+   int i;
+
+   for (i = 0; i < free->npages; ++i) {
+   if (free->phys_mem[i]) {
+   if (!PageReserved(free->phys_mem[i]))
+   SetPageDirty(free->phys_mem[i]);
+   page_cache_release(free->phys_mem[i]);
+   }
+   }
+}
+
+static void kvm_free_kernel_physmem(struct kvm_memory_slot *free)
+{
+   int i;
+
+   for (i = 0; i < free->npages; ++i)
+   if (free->phys_mem[i])
+   __free_page(free->phys_mem[i]);
+}
+
 /*
  * Free any memory in @free but not in @dont.
  */
 static void kvm_free_physmem_slot(struct kvm_memory_slot *free,
  struct kvm_memory_slot *dont)
 {
-   int i;
-
if (!dont || free->phys_mem != dont->phys_mem)
if (free->phys_mem) {
-   for (i = 0; i < free->npages; ++i)
-   if (free->phys_mem[i])
-   __free_page(free->phys_mem[i]);
+   if (free->user_alloc)
+   kvm_free_userspace_physmem(free);
+   else
+   kvm_free_kernel_physmem(free);
vfree(free->phys_mem);
}
if (!dont || free->rmap != dont->rmap)
@@ -652,7 +674,9 @@ EXPORT_SYMBOL_GPL(fx_init);
  * Discontiguous memory is allowed, mostly for framebuffers.
  */
 static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
- struct kvm_memory_region *mem)
+ struct
+ kvm_userspace_memory_region *mem,
+ int user_alloc)
 {
int r;
gfn_t base_gfn;
@@ -728,11 +752,27 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
 
memset(new.phys_mem, 0, npages * sizeof(struct page *));
memset(new.rmap, 0, npages * sizeof(*new.rmap));
-   for (i = 0; i < npages; ++i) {
-   new.phys_mem[i] = alloc_page(GFP_HIGHUSER
-| __GFP_ZERO);
-   if (!new.phys_mem[i])
+   if (user_alloc) {
+   unsigned long pages_num;
+
+   new.user_alloc = 1;
+   down_read(¤t->mm->mmap_sem);
+
+   pages_num = get_user_pages(current, current->mm,
+  mem->userspace_addr,
+  npages, 1, 0, new.phys_mem,
+  NULL);
+
+   up_read(¤t->mm->mmap_sem);
+   if (pages_num != npages)
goto out_unlock;
+   } else {
+   for (i = 0; i < npages; ++i) {
+   new.phys_mem[i] = alloc_page(GFP_HIGHUSER
+| __GFP_ZERO);
+   if (!new.phys_mem[i])
+   goto out_unlock;
+   }
}
}
 
@@ -3108,11 +3148,29 @@ static long kvm_vm_ioctl(struct file *filp,
break;
case KVM_SET_MEMORY_REGION: {
struct kvm_memory_region kvm_mem;
+   struct kvm_userspace_memory_region kvm_userspace_mem;
 
r = -EFA

[kvm-devel] [PATCH 30/50] KVM: MMU: More struct kvm_vcpu -> struct kvm cleanups

2007-12-23 Thread Avi Kivity
From: Anthony Liguori <[EMAIL PROTECTED]>

This time, the biggest change is gpa_to_hpa. The translation of GPA to HPA does
not depend on the VCPU state unlike GVA to GPA so there's no need to pass in
the kvm_vcpu.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |2 +-
 drivers/kvm/mmu.c |   26 +-
 drivers/kvm/paging_tmpl.h |6 +++---
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 9c9c1d7..d56962d 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -554,7 +554,7 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int 
slot);
 void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
-hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
+hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa);
 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
 #define HPA_ERR_MASK ((hpa_t)1 << HPA_MSB)
 static inline int is_error_hpa(hpa_t hpa) { return hpa >> HPA_MSB; }
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index a5ca945..d046ba8 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -451,14 +451,14 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
}
 }
 
-static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
+static void rmap_write_protect(struct kvm *kvm, u64 gfn)
 {
struct kvm_rmap_desc *desc;
unsigned long *rmapp;
u64 *spte;
 
-   gfn = unalias_gfn(vcpu->kvm, gfn);
-   rmapp = gfn_to_rmap(vcpu->kvm, gfn);
+   gfn = unalias_gfn(kvm, gfn);
+   rmapp = gfn_to_rmap(kvm, gfn);
 
while (*rmapp) {
if (!(*rmapp & 1))
@@ -471,9 +471,9 @@ static void rmap_write_protect(struct kvm_vcpu *vcpu, u64 
gfn)
BUG_ON(!(*spte & PT_PRESENT_MASK));
BUG_ON(!(*spte & PT_WRITABLE_MASK));
rmap_printk("rmap_write_protect: spte %p %llx\n", spte, *spte);
-   rmap_remove(vcpu->kvm, spte);
+   rmap_remove(kvm, spte);
set_shadow_pte(spte, *spte & ~PT_WRITABLE_MASK);
-   kvm_flush_remote_tlbs(vcpu->kvm);
+   kvm_flush_remote_tlbs(kvm);
}
 }
 
@@ -670,7 +670,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
hlist_add_head(&page->hash_link, bucket);
vcpu->mmu.prefetch_page(vcpu, page);
if (!metaphysical)
-   rmap_write_protect(vcpu, gfn);
+   rmap_write_protect(vcpu->kvm, gfn);
return page;
 }
 
@@ -823,19 +823,19 @@ static void page_header_update_slot(struct kvm *kvm, void 
*pte, gpa_t gpa)
__set_bit(slot, &page_head->slot_bitmap);
 }
 
-hpa_t safe_gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+hpa_t safe_gpa_to_hpa(struct kvm *kvm, gpa_t gpa)
 {
-   hpa_t hpa = gpa_to_hpa(vcpu, gpa);
+   hpa_t hpa = gpa_to_hpa(kvm, gpa);
 
return is_error_hpa(hpa) ? bad_page_address | (gpa & ~PAGE_MASK): hpa;
 }
 
-hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa)
+hpa_t gpa_to_hpa(struct kvm *kvm, gpa_t gpa)
 {
struct page *page;
 
ASSERT((gpa & HPA_ERR_MASK) == 0);
-   page = gfn_to_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+   page = gfn_to_page(kvm, gpa >> PAGE_SHIFT);
if (!page)
return gpa | HPA_ERR_MASK;
return ((hpa_t)page_to_pfn(page) << PAGE_SHIFT)
@@ -848,7 +848,7 @@ hpa_t gva_to_hpa(struct kvm_vcpu *vcpu, gva_t gva)
 
if (gpa == UNMAPPED_GVA)
return UNMAPPED_GVA;
-   return gpa_to_hpa(vcpu, gpa);
+   return gpa_to_hpa(vcpu->kvm, gpa);
 }
 
 struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
@@ -857,7 +857,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
 
if (gpa == UNMAPPED_GVA)
return NULL;
-   return pfn_to_page(gpa_to_hpa(vcpu, gpa) >> PAGE_SHIFT);
+   return pfn_to_page(gpa_to_hpa(vcpu->kvm, gpa) >> PAGE_SHIFT);
 }
 
 static void nonpaging_new_cr3(struct kvm_vcpu *vcpu)
@@ -1012,7 +1012,7 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, 
gva_t gva,
ASSERT(VALID_PAGE(vcpu->mmu.root_hpa));
 
 
-   paddr = gpa_to_hpa(vcpu , addr & PT64_BASE_ADDR_MASK);
+   paddr = gpa_to_hpa(vcpu->kvm, addr & PT64_BASE_ADDR_MASK);
 
if (is_error_hpa(paddr))
return 1;
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 4f6edf8..8e1e4ca 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -103,7 +103,7 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
pgprintk("%s: table_gfn[%d] %lx\n", __FUNCTION__,
 walker->level - 1, table_gfn);
slot = gfn_to_memslot(vcpu->kvm, table_gfn);
-  

[kvm-devel] [PATCH 28/50] KVM: Move x86 msr handling to new files x86.[ch]

2007-12-23 Thread Avi Kivity
From: Carsten Otte <[EMAIL PROTECTED]>

Signed-off-by: Carsten Otte <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/Makefile   |2 +-
 drivers/kvm/kvm.h  |4 ++
 drivers/kvm/kvm_main.c |   69 +---
 drivers/kvm/x86.c  |  102 
 drivers/kvm/x86.h  |   16 +++
 5 files changed, 126 insertions(+), 67 deletions(-)
 create mode 100644 drivers/kvm/x86.c
 create mode 100644 drivers/kvm/x86.h

diff --git a/drivers/kvm/Makefile b/drivers/kvm/Makefile
index e5a8f4d..cf18ad4 100644
--- a/drivers/kvm/Makefile
+++ b/drivers/kvm/Makefile
@@ -2,7 +2,7 @@
 # Makefile for Kernel-based Virtual Machine module
 #
 
-kvm-objs := kvm_main.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
+kvm-objs := kvm_main.o x86.o mmu.o x86_emulate.o i8259.o irq.o lapic.o ioapic.o
 obj-$(CONFIG_KVM) += kvm.o
 kvm-intel-objs = vmx.o
 obj-$(CONFIG_KVM_INTEL) += kvm-intel.o
diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 3eaed4d..9c9c1d7 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -653,6 +653,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
 
 int kvm_fix_hypercall(struct kvm_vcpu *vcpu);
 
+long kvm_arch_dev_ioctl(struct file *filp,
+   unsigned int ioctl, unsigned long arg);
+__init void kvm_arch_init(void);
+
 static inline void kvm_guest_enter(void)
 {
current->flags |= PF_VCPU;
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 9633fd3..9f7370f 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -16,6 +16,7 @@
  */
 
 #include "kvm.h"
+#include "x86.h"
 #include "x86_emulate.h"
 #include "segment_descriptor.h"
 #include "irq.h"
@@ -2508,43 +2509,6 @@ void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int 
*db, int *l)
 EXPORT_SYMBOL_GPL(kvm_get_cs_db_l_bits);
 
 /*
- * List of msr numbers which we expose to userspace through KVM_GET_MSRS
- * and KVM_SET_MSRS, and KVM_GET_MSR_INDEX_LIST.
- *
- * This list is modified at module load time to reflect the
- * capabilities of the host cpu.
- */
-static u32 msrs_to_save[] = {
-   MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP,
-   MSR_K6_STAR,
-#ifdef CONFIG_X86_64
-   MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR,
-#endif
-   MSR_IA32_TIME_STAMP_COUNTER,
-};
-
-static unsigned num_msrs_to_save;
-
-static u32 emulated_msrs[] = {
-   MSR_IA32_MISC_ENABLE,
-};
-
-static __init void kvm_init_msr_list(void)
-{
-   u32 dummy[2];
-   unsigned i, j;
-
-   for (i = j = 0; i < ARRAY_SIZE(msrs_to_save); i++) {
-   if (rdmsr_safe(msrs_to_save[i], &dummy[0], &dummy[1]) < 0)
-   continue;
-   if (j < i)
-   msrs_to_save[j] = msrs_to_save[i];
-   j++;
-   }
-   num_msrs_to_save = j;
-}
-
-/*
  * Adapt set_msr() to msr_io()'s calling convention
  */
 static int do_set_msr(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
@@ -3356,33 +3320,6 @@ static long kvm_dev_ioctl(struct file *filp,
goto out;
r = kvm_dev_ioctl_create_vm();
break;
-   case KVM_GET_MSR_INDEX_LIST: {
-   struct kvm_msr_list __user *user_msr_list = argp;
-   struct kvm_msr_list msr_list;
-   unsigned n;
-
-   r = -EFAULT;
-   if (copy_from_user(&msr_list, user_msr_list, sizeof msr_list))
-   goto out;
-   n = msr_list.nmsrs;
-   msr_list.nmsrs = num_msrs_to_save + ARRAY_SIZE(emulated_msrs);
-   if (copy_to_user(user_msr_list, &msr_list, sizeof msr_list))
-   goto out;
-   r = -E2BIG;
-   if (n < num_msrs_to_save)
-   goto out;
-   r = -EFAULT;
-   if (copy_to_user(user_msr_list->indices, &msrs_to_save,
-num_msrs_to_save * sizeof(u32)))
-   goto out;
-   if (copy_to_user(user_msr_list->indices
-+ num_msrs_to_save * sizeof(u32),
-&emulated_msrs,
-ARRAY_SIZE(emulated_msrs) * sizeof(u32)))
-   goto out;
-   r = 0;
-   break;
-   }
case KVM_CHECK_EXTENSION: {
int ext = (long)argp;
 
@@ -3406,7 +3343,7 @@ static long kvm_dev_ioctl(struct file *filp,
r = 2 * PAGE_SIZE;
break;
default:
-   ;
+   return kvm_arch_dev_ioctl(filp, ioctl, arg);
}
 out:
return r;
@@ -3770,7 +3707,7 @@ static __init int kvm_init(void)
 
kvm_init_debug();
 
-   kvm_init_msr_list();
+   kvm_arch_init();
 

[kvm-devel] [PATCH 22/50] KVM: Allow dynamic allocation of the mmu shadow cache size

2007-12-23 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED]>

The user is now able to set how many mmu pages will be allocated to the guest.

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |7 ++-
 drivers/kvm/kvm_main.c |   47 +++
 drivers/kvm/mmu.c  |   40 ++--
 include/linux/kvm.h|3 +++
 4 files changed, 94 insertions(+), 3 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 1965438..9f10c37 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -40,6 +40,8 @@
 #define KVM_MAX_VCPUS 4
 #define KVM_ALIAS_SLOTS 4
 #define KVM_MEMORY_SLOTS 8
+#define KVM_PERMILLE_MMU_PAGES 20
+#define KVM_MIN_ALLOC_MMU_PAGES 64
 #define KVM_NUM_MMU_PAGES 1024
 #define KVM_MIN_FREE_MMU_PAGES 5
 #define KVM_REFILL_PAGES 25
@@ -418,7 +420,9 @@ struct kvm {
 * Hash table of struct kvm_mmu_page.
 */
struct list_head active_mmu_pages;
-   int n_free_mmu_pages;
+   unsigned int n_free_mmu_pages;
+   unsigned int n_requested_mmu_pages;
+   unsigned int n_alloc_mmu_pages;
struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
unsigned long rmap_overflow;
@@ -547,6 +551,7 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 
notrap_pte);
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
 void kvm_mmu_zap_all(struct kvm *kvm);
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages);
 
 hpa_t gpa_to_hpa(struct kvm_vcpu *vcpu, gpa_t gpa);
 #define HPA_MSB ((sizeof(hpa_t) * 8) - 1)
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 3d1972e..d220e63 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -743,6 +743,24 @@ static int kvm_vm_ioctl_set_memory_region(struct kvm *kvm,
if (mem->slot >= kvm->nmemslots)
kvm->nmemslots = mem->slot + 1;
 
+   if (!kvm->n_requested_mmu_pages) {
+   unsigned int n_pages;
+
+   if (npages) {
+   n_pages = npages * KVM_PERMILLE_MMU_PAGES / 1000;
+   kvm_mmu_change_mmu_pages(kvm, kvm->n_alloc_mmu_pages +
+n_pages);
+   } else {
+   unsigned int nr_mmu_pages;
+
+   n_pages = old.npages * KVM_PERMILLE_MMU_PAGES / 1000;
+   nr_mmu_pages = kvm->n_alloc_mmu_pages - n_pages;
+   nr_mmu_pages = max(nr_mmu_pages,
+   (unsigned int) KVM_MIN_ALLOC_MMU_PAGES);
+   kvm_mmu_change_mmu_pages(kvm, nr_mmu_pages);
+   }
+   }
+
*memslot = new;
 
kvm_mmu_slot_remove_write_access(kvm, mem->slot);
@@ -760,6 +778,26 @@ out:
return r;
 }
 
+static int kvm_vm_ioctl_set_nr_mmu_pages(struct kvm *kvm,
+ u32 kvm_nr_mmu_pages)
+{
+   if (kvm_nr_mmu_pages < KVM_MIN_ALLOC_MMU_PAGES)
+   return -EINVAL;
+
+   mutex_lock(&kvm->lock);
+
+   kvm_mmu_change_mmu_pages(kvm, kvm_nr_mmu_pages);
+   kvm->n_requested_mmu_pages = kvm_nr_mmu_pages;
+
+   mutex_unlock(&kvm->lock);
+   return 0;
+}
+
+static int kvm_vm_ioctl_get_nr_mmu_pages(struct kvm *kvm)
+{
+   return kvm->n_alloc_mmu_pages;
+}
+
 /*
  * Get (and clear) the dirty memory log for a memory slot.
  */
@@ -3071,6 +3109,14 @@ static long kvm_vm_ioctl(struct file *filp,
goto out;
break;
}
+   case KVM_SET_NR_MMU_PAGES:
+   r = kvm_vm_ioctl_set_nr_mmu_pages(kvm, arg);
+   if (r)
+   goto out;
+   break;
+   case KVM_GET_NR_MMU_PAGES:
+   r = kvm_vm_ioctl_get_nr_mmu_pages(kvm);
+   break;
case KVM_GET_DIRTY_LOG: {
struct kvm_dirty_log log;
 
@@ -3278,6 +3324,7 @@ static long kvm_dev_ioctl(struct file *filp,
switch (ext) {
case KVM_CAP_IRQCHIP:
case KVM_CAP_HLT:
+   case KVM_CAP_MMU_SHADOW_CACHE_CONTROL:
r = 1;
break;
default:
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 72757db..6cda1fe 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -747,6 +747,40 @@ static void kvm_mmu_zap_page(struct kvm *kvm,
kvm_mmu_reset_last_pte_updated(kvm);
 }
 
+/*
+ * Changing the number of mmu pages allocated to the vm
+ * Note: if kvm_nr_mmu_pages is too small, you will get dead lock
+ */
+void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages)
+{
+   /*
+* If we set the number of mmu pages to be smaller be th

[kvm-devel] [PATCH 39/50] KVM: Add some \n in ioapic_debug()

2007-12-23 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

Add new-line at end of debug strings.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/ioapic.c |   25 ++---
 1 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/drivers/kvm/ioapic.c b/drivers/kvm/ioapic.c
index c7992e6..8503d99 100644
--- a/drivers/kvm/ioapic.c
+++ b/drivers/kvm/ioapic.c
@@ -40,8 +40,11 @@
 #include 
 #include 
 #include "irq.h"
-/* #define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
+#if 0
+#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
+#else
 #define ioapic_debug(fmt, arg...)
+#endif
 static void ioapic_deliver(struct kvm_ioapic *vioapic, int irq);
 
 static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
@@ -113,7 +116,7 @@ static void ioapic_write_indirect(struct kvm_ioapic 
*ioapic, u32 val)
default:
index = (ioapic->ioregsel - 0x10) >> 1;
 
-   ioapic_debug("change redir index %x val %x", index, val);
+   ioapic_debug("change redir index %x val %x\n", index, val);
if (index >= IOAPIC_NUM_PINS)
return;
if (ioapic->ioregsel & 1) {
@@ -134,7 +137,7 @@ static void ioapic_inj_irq(struct kvm_ioapic *ioapic,
   struct kvm_lapic *target,
   u8 vector, u8 trig_mode, u8 delivery_mode)
 {
-   ioapic_debug("irq %d trig %d deliv %d", vector, trig_mode,
+   ioapic_debug("irq %d trig %d deliv %d\n", vector, trig_mode,
 delivery_mode);
 
ASSERT((delivery_mode == dest_Fixed) ||
@@ -151,7 +154,7 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic 
*ioapic, u8 dest,
struct kvm *kvm = ioapic->kvm;
struct kvm_vcpu *vcpu;
 
-   ioapic_debug("dest %d dest_mode %d", dest, dest_mode);
+   ioapic_debug("dest %d dest_mode %d\n", dest, dest_mode);
 
if (dest_mode == 0) {   /* Physical mode. */
if (dest == 0xFF) { /* Broadcast. */
@@ -179,7 +182,7 @@ static u32 ioapic_get_delivery_bitmask(struct kvm_ioapic 
*ioapic, u8 dest,
kvm_apic_match_logical_addr(vcpu->apic, dest))
mask |= 1 << vcpu->vcpu_id;
}
-   ioapic_debug("mask %x", mask);
+   ioapic_debug("mask %x\n", mask);
return mask;
 }
 
@@ -196,12 +199,12 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq)
int vcpu_id;
 
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
-"vector=%x trig_mode=%x",
+"vector=%x trig_mode=%x\n",
 dest, dest_mode, delivery_mode, vector, trig_mode);
 
deliver_bitmask = ioapic_get_delivery_bitmask(ioapic, dest, dest_mode);
if (!deliver_bitmask) {
-   ioapic_debug("no target on destination");
+   ioapic_debug("no target on destination\n");
return;
}
 
@@ -214,7 +217,7 @@ static void ioapic_deliver(struct kvm_ioapic *ioapic, int 
irq)
   trig_mode, delivery_mode);
else
ioapic_debug("null round robin: "
-"mask=%x vector=%x delivery_mode=%x",
+"mask=%x vector=%x delivery_mode=%x\n",
 deliver_bitmask, vector, dest_LowestPrio);
break;
case dest_Fixed:
@@ -304,7 +307,7 @@ static void ioapic_mmio_read(struct kvm_io_device *this, 
gpa_t addr, int len,
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
u32 result;
 
-   ioapic_debug("addr %lx", (unsigned long)addr);
+   ioapic_debug("addr %lx\n", (unsigned long)addr);
ASSERT(!(addr & 0xf));  /* check alignment */
 
addr &= 0xff;
@@ -341,8 +344,8 @@ static void ioapic_mmio_write(struct kvm_io_device *this, 
gpa_t addr, int len,
struct kvm_ioapic *ioapic = (struct kvm_ioapic *)this->private;
u32 data;
 
-   ioapic_debug("ioapic_mmio_write addr=%lx len=%d val=%p\n",
-addr, len, val);
+   ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
+(void*)addr, len, val);
ASSERT(!(addr & 0xf));  /* check alignment */
if (len == 4 || len == 8)
data = *(u32 *) val;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 42/50] KVM: x86 emulator: Implement emulation of instruction: inc & dec

2007-12-23 Thread Avi Kivity
From: Nitin A Kamble <[EMAIL PROTECTED]>

Instructions:
inc r16/r32 (opcode 0x40-0x47)
dec r16/r32 (opcode 0x48-0x4f)

Signed-off-by: Nitin A Kamble <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |   20 ++--
 1 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index b03029e..72621c9 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -96,8 +96,12 @@ static u8 opcode_table[256] = {
ByteOp | DstMem | SrcReg | ModRM, DstMem | SrcReg | ModRM,
ByteOp | DstReg | SrcMem | ModRM, DstReg | SrcMem | ModRM,
0, 0, 0, 0,
-   /* 0x40 - 0x4F */
-   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+   /* 0x40 - 0x47 */
+   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+   /* 0x48 - 0x4F */
+   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
+   ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
/* 0x50 - 0x57 */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
@@ -1376,6 +1380,18 @@ special_insn:
if (c->twobyte)
goto twobyte_special_insn;
switch (c->b) {
+   case 0x40 ... 0x47: /* inc r16/r32 */
+   c->dst.bytes = c->op_bytes;
+   c->dst.ptr = (unsigned long *)&c->regs[c->b & 0x7];
+   c->dst.val = *c->dst.ptr;
+   emulate_1op("inc", c->dst, ctxt->eflags);
+   break;
+   case 0x48 ... 0x4f: /* dec r16/r32 */
+   c->dst.bytes = c->op_bytes;
+   c->dst.ptr = (unsigned long *)&c->regs[c->b & 0x7];
+   c->dst.val = *c->dst.ptr;
+   emulate_1op("dec", c->dst, ctxt->eflags);
+   break;
case 0x50 ... 0x57:  /* push reg */
if (c->op_bytes == 2)
c->src.val = (u16) c->regs[c->b & 0x7];
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 41/50] KVM: Rename KVM_TLB_FLUSH to KVM_REQ_TLB_FLUSH

2007-12-23 Thread Avi Kivity
We now have a new namespace, KVM_REQ_*, for bits in vcpu->requests.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |2 +-
 drivers/kvm/kvm_main.c |4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 1edf8a5..6ae7b63 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -67,7 +67,7 @@
 /*
  * vcpu->requests bit members
  */
-#define KVM_TLB_FLUSH 0
+#define KVM_REQ_TLB_FLUSH  0
 
 /*
  * Address types:
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index c94d4df..a1a3be9 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -212,7 +212,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
vcpu = kvm->vcpus[i];
if (!vcpu)
continue;
-   if (test_and_set_bit(KVM_TLB_FLUSH, &vcpu->requests))
+   if (test_and_set_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
continue;
cpu = vcpu->cpu;
if (cpu != -1 && cpu != raw_smp_processor_id())
@@ -2171,7 +2171,7 @@ again:
kvm_guest_enter();
 
if (vcpu->requests)
-   if (test_and_clear_bit(KVM_TLB_FLUSH, &vcpu->requests))
+   if (test_and_clear_bit(KVM_REQ_TLB_FLUSH, &vcpu->requests))
kvm_x86_ops->tlb_flush(vcpu);
 
kvm_x86_ops->run(vcpu, kvm_run);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 34/50] KVM: MMU: Instantiate real-mode shadows as user writable shadows

2007-12-23 Thread Avi Kivity
This is consistent with real-mode permissions.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/mmu.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index e6616a6..f52604a 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -902,7 +902,7 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, 
hpa_t p)
>> PAGE_SHIFT;
new_table = kvm_mmu_get_page(vcpu, pseudo_gfn,
 v, level - 1,
-1, 0, &table[index]);
+1, 3, &table[index]);
if (!new_table) {
pgprintk("nonpaging_map: ENOMEM\n");
return -ENOMEM;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 32/50] KVM: MMU: Fix nx access bit for huge pages

2007-12-23 Thread Avi Kivity
We must set the bit before the shift, otherwise the wrong bit gets set.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/paging_tmpl.h |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index da36e48..e07cb2e 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -382,9 +382,9 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
metaphysical = 1;
hugepage_access = walker->pte;
hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
+   hugepage_access >>= PT_WRITABLE_SHIFT;
if (walker->pte & PT64_NX_MASK)
hugepage_access |= (1 << 2);
-   hugepage_access >>= PT_WRITABLE_SHIFT;
table_gfn = (walker->pte & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT;
} else {
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 33/50] KVM: MMU: Disable write access on clean large pages

2007-12-23 Thread Avi Kivity
By forcing clean huge pages to be read-only, we have separate roles
for the shadow of a clean large page and the shadow of a dirty large
page.  This is necessary because different ptes will be instantiated
for the two cases, even for read faults.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/paging_tmpl.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index e07cb2e..4538b15 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -382,6 +382,8 @@ static u64 *FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
metaphysical = 1;
hugepage_access = walker->pte;
hugepage_access &= PT_USER_MASK | PT_WRITABLE_MASK;
+   if (!is_dirty_pte(walker->pte))
+   hugepage_access &= ~PT_WRITABLE_MASK;
hugepage_access >>= PT_WRITABLE_SHIFT;
if (walker->pte & PT64_NX_MASK)
hugepage_access |= (1 << 2);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 35/50] KVM: MMU: Move dirty bit updates to a separate function

2007-12-23 Thread Avi Kivity
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/paging_tmpl.h |   23 +++
 1 files changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 4538b15..a0f84a5 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -69,6 +69,17 @@ struct guest_walker {
u32 error_code;
 };
 
+static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu,
+   int write_fault,
+   pt_element_t *ptep,
+   gfn_t table_gfn)
+{
+   if (write_fault && !is_dirty_pte(*ptep)) {
+   mark_page_dirty(vcpu->kvm, table_gfn);
+   *ptep |= PT_DIRTY_MASK;
+   }
+}
+
 /*
  * Fetch a guest pte for a guest virtual address
  */
@@ -144,10 +155,8 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT;
-   if (write_fault && !is_dirty_pte(*ptep)) {
-   mark_page_dirty(vcpu->kvm, table_gfn);
-   *ptep |= PT_DIRTY_MASK;
-   }
+   FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
+   table_gfn);
break;
}
 
@@ -157,10 +166,8 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
>> PAGE_SHIFT;
walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
-   if (write_fault && !is_dirty_pte(*ptep)) {
-   mark_page_dirty(vcpu->kvm, table_gfn);
-   *ptep |= PT_DIRTY_MASK;
-   }
+   FNAME(update_dirty_bit)(vcpu, write_fault, ptep,
+   table_gfn);
break;
}
 
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 36/50] KVM: MMU: When updating the dirty bit, inform the mmu about it

2007-12-23 Thread Avi Kivity
Since the mmu uses different shadow pages for dirty large pages and clean
large pages, this allows the mmu to drop ptes that are now invalid.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/paging_tmpl.h |5 +
 1 files changed, 5 insertions(+), 0 deletions(-)

diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index a0f84a5..a9e687b 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -74,9 +74,14 @@ static void FNAME(update_dirty_bit)(struct kvm_vcpu *vcpu,
pt_element_t *ptep,
gfn_t table_gfn)
 {
+   gpa_t pte_gpa;
+
if (write_fault && !is_dirty_pte(*ptep)) {
mark_page_dirty(vcpu->kvm, table_gfn);
*ptep |= PT_DIRTY_MASK;
+   pte_gpa = ((gpa_t)table_gfn << PAGE_SHIFT);
+   pte_gpa += offset_in_page(ptep);
+   kvm_mmu_pte_write(vcpu, pte_gpa, (u8 *)ptep, sizeof(*ptep));
}
 }
 
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 29/50] KVM: MMU: Clean up MMU functions to take struct kvm when appropriate

2007-12-23 Thread Avi Kivity
From: Anthony Liguori <[EMAIL PROTECTED]>

Some of the MMU functions take a struct kvm_vcpu even though they affect all
VCPUs.  This patch cleans up some of them to instead take a struct kvm.  This
makes things a bit more clear.

The main thing that was confusing me was whether certain functions need to be
called on all VCPUs.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/mmu.c |   18 +-
 drivers/kvm/paging_tmpl.h |4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index ece0aa4..a5ca945 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -606,7 +606,7 @@ static void mmu_page_remove_parent_pte(struct kvm_mmu_page 
*page,
BUG();
 }
 
-static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm_vcpu *vcpu,
+static struct kvm_mmu_page *kvm_mmu_lookup_page(struct kvm *kvm,
gfn_t gfn)
 {
unsigned index;
@@ -616,7 +616,7 @@ static struct kvm_mmu_page *kvm_mmu_lookup_page(struct 
kvm_vcpu *vcpu,
 
pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
-   bucket = &vcpu->kvm->mmu_page_hash[index];
+   bucket = &kvm->mmu_page_hash[index];
hlist_for_each_entry(page, node, bucket, hash_link)
if (page->gfn == gfn && !page->role.metaphysical) {
pgprintk("%s: found role %x\n",
@@ -782,7 +782,7 @@ void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int 
kvm_nr_mmu_pages)
kvm->n_alloc_mmu_pages = kvm_nr_mmu_pages;
 }
 
-static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
+static int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 {
unsigned index;
struct hlist_head *bucket;
@@ -793,25 +793,25 @@ static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, 
gfn_t gfn)
pgprintk("%s: looking for gfn %lx\n", __FUNCTION__, gfn);
r = 0;
index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
-   bucket = &vcpu->kvm->mmu_page_hash[index];
+   bucket = &kvm->mmu_page_hash[index];
hlist_for_each_entry_safe(page, node, n, bucket, hash_link)
if (page->gfn == gfn && !page->role.metaphysical) {
pgprintk("%s: gfn %lx role %x\n", __FUNCTION__, gfn,
 page->role.word);
-   kvm_mmu_zap_page(vcpu->kvm, page);
+   kvm_mmu_zap_page(kvm, page);
r = 1;
}
return r;
 }
 
-static void mmu_unshadow(struct kvm_vcpu *vcpu, gfn_t gfn)
+static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 {
struct kvm_mmu_page *page;
 
-   while ((page = kvm_mmu_lookup_page(vcpu, gfn)) != NULL) {
+   while ((page = kvm_mmu_lookup_page(kvm, gfn)) != NULL) {
pgprintk("%s: zap %lx %x\n",
 __FUNCTION__, gfn, page->role.word);
-   kvm_mmu_zap_page(vcpu->kvm, page);
+   kvm_mmu_zap_page(kvm, page);
}
 }
 
@@ -1299,7 +1299,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, 
gva_t gva)
 {
gpa_t gpa = vcpu->mmu.gva_to_gpa(vcpu, gva);
 
-   return kvm_mmu_unprotect_page(vcpu, gpa >> PAGE_SHIFT);
+   return kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
 }
 
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 447d2c3..4f6edf8 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -268,11 +268,11 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
 
spte |= PT_WRITABLE_MASK;
if (user_fault) {
-   mmu_unshadow(vcpu, gfn);
+   mmu_unshadow(vcpu->kvm, gfn);
goto unshadowed;
}
 
-   shadow = kvm_mmu_lookup_page(vcpu, gfn);
+   shadow = kvm_mmu_lookup_page(vcpu->kvm, gfn);
if (shadow) {
pgprintk("%s: found shadow page for %lx, marking ro\n",
 __FUNCTION__, gfn);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 40/50] KVM: Move apic timer interrupt backlog processing to common code

2007-12-23 Thread Avi Kivity
Beside the obvious goodness of making code more common, this prevents
a livelock with the next patch which moves interrupt injection out of the
critical section.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |2 ++
 drivers/kvm/svm.c  |1 -
 drivers/kvm/vmx.c  |1 -
 3 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 03d6069..c94d4df 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -2144,6 +2144,8 @@ again:
if (unlikely(r))
goto out;
 
+   kvm_inject_pending_timer_irqs(vcpu);
+
preempt_disable();
 
kvm_x86_ops->prepare_guest_switch(vcpu);
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index 746a377..4ff2922 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -1355,7 +1355,6 @@ static void svm_intr_assist(struct kvm_vcpu *vcpu)
struct vmcb *vmcb = svm->vmcb;
int intr_vector = -1;
 
-   kvm_inject_pending_timer_irqs(vcpu);
if ((vmcb->control.exit_int_info & SVM_EVTINJ_VALID) &&
((vmcb->control.exit_int_info & SVM_EVTINJ_TYPE_MASK) == 0)) {
intr_vector = vmcb->control.exit_int_info &
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 1336174..be6846d 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -2191,7 +2191,6 @@ static void vmx_intr_assist(struct kvm_vcpu *vcpu)
int has_ext_irq, interrupt_window_open;
int vector;
 
-   kvm_inject_pending_timer_irqs(vcpu);
update_tpr_threshold(vcpu);
 
has_ext_irq = kvm_cpu_has_interrupt(vcpu);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 18/50] KVM: VMX: Don't clear the vmcs if the vcpu is not loaded on any processor

2007-12-23 Thread Avi Kivity
Noted by Eddie Dong.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index d32e63d..8929575 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -225,7 +225,9 @@ static void __vcpu_clear(void *arg)
 
 static void vcpu_clear(struct vcpu_vmx *vmx)
 {
-   if (vmx->vcpu.cpu != raw_smp_processor_id() && vmx->vcpu.cpu != -1)
+   if (vmx->vcpu.cpu == -1)
+   return;
+   if (vmx->vcpu.cpu != raw_smp_processor_id())
smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear,
 vmx, 0, 1);
else
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 38/50] KVM: apic round robin cleanup

2007-12-23 Thread Avi Kivity
From: Qing He <[EMAIL PROTECTED]>

If no apic is enabled in the bitmap of an interrupt delivery with delivery
mode of lowest priority, a warning should be reported rather than select
a fallback vcpu

Signed-off-by: Qing He <[EMAIL PROTECTED]>
Signed-off-by: Eddie (Yaozu) Dong <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/lapic.c |   13 +++--
 1 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index e15b42e..8840f9d 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -395,10 +395,9 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int 
delivery_mode,
 struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 vector,
   unsigned long bitmap)
 {
-   int vcpu_id;
int last;
int next;
-   struct kvm_lapic *apic;
+   struct kvm_lapic *apic = NULL;
 
last = kvm->round_robin_prev_vcpu;
next = last;
@@ -415,14 +414,8 @@ struct kvm_lapic *kvm_apic_round_robin(struct kvm *kvm, u8 
vector,
} while (next != last);
kvm->round_robin_prev_vcpu = next;
 
-   if (!apic) {
-   vcpu_id = ffs(bitmap) - 1;
-   if (vcpu_id < 0) {
-   vcpu_id = 0;
-   printk(KERN_DEBUG "vcpu not ready for 
apic_round_robin\n");
-   }
-   apic = kvm->vcpus[vcpu_id]->apic;
-   }
+   if (!apic)
+   printk(KERN_DEBUG "vcpu not ready for apic_round_robin\n");
 
return apic;
 }
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 24/50] KVM: Hoist kvm_create_lapic() into kvm_vcpu_init()

2007-12-23 Thread Avi Kivity
From: Rusty Russell <[EMAIL PROTECTED]>

Move kvm_create_lapic() into kvm_vcpu_init(), rather than having svm
and vmx do it.  And make it return the error rather than a fairly
random -ENOMEM.

This also solves the problem that neither svm.c nor vmx.c actually
handles the error path properly.

Signed-off-by: Rusty Russell <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |   10 +-
 drivers/kvm/svm.c  |6 --
 drivers/kvm/vmx.c  |6 --
 3 files changed, 9 insertions(+), 13 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 760753d..0a04b75 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -255,14 +255,22 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, 
unsigned id)
if (r < 0)
goto fail_free_pio_data;
 
+   if (irqchip_in_kernel(kvm)) {
+   r = kvm_create_lapic(vcpu);
+   if (r < 0)
+   goto fail_mmu_destroy;
+   }
+
return 0;
 
+fail_mmu_destroy:
+   kvm_mmu_destroy(vcpu);
 fail_free_pio_data:
free_page((unsigned long)vcpu->pio_data);
 fail_free_run:
free_page((unsigned long)vcpu->run);
 fail:
-   return -ENOMEM;
+   return r;
 }
 EXPORT_SYMBOL_GPL(kvm_vcpu_init);
 
diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
index a0eef78..f2278d0 100644
--- a/drivers/kvm/svm.c
+++ b/drivers/kvm/svm.c
@@ -588,12 +588,6 @@ static struct kvm_vcpu *svm_create_vcpu(struct kvm *kvm, 
unsigned int id)
if (err)
goto free_svm;
 
-   if (irqchip_in_kernel(kvm)) {
-   err = kvm_create_lapic(&svm->vcpu);
-   if (err < 0)
-   goto free_svm;
-   }
-
page = alloc_page(GFP_KERNEL);
if (!page) {
err = -ENOMEM;
diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index ce15e51..718d1f4 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -2431,12 +2431,6 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
if (err)
goto free_vcpu;
 
-   if (irqchip_in_kernel(kvm)) {
-   err = kvm_create_lapic(&vmx->vcpu);
-   if (err < 0)
-   goto free_vcpu;
-   }
-
vmx->guest_msrs = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (!vmx->guest_msrs) {
err = -ENOMEM;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 23/50] KVM: Add kvm_free_lapic() to pair with kvm_create_lapic()

2007-12-23 Thread Avi Kivity
From: Rusty Russell <[EMAIL PROTECTED]>

Instead of the asymetry of kvm_free_apic, implement kvm_free_lapic().
And guess what?  I found a minor bug: we don't need to hrtimer_cancel()
from kvm_main.c, because we do that in kvm_free_apic().

Also:
1) kvm_vcpu_uninit should be the reverse order from kvm_vcpu_init.
2) Don't set apic->regs_page to zero before freeing apic.

Signed-off-by: Rusty Russell <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/irq.h  |2 +-
 drivers/kvm/kvm_main.c |4 +---
 drivers/kvm/lapic.c|   19 +--
 3 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/kvm/irq.h b/drivers/kvm/irq.h
index 11fc014..508280e 100644
--- a/drivers/kvm/irq.h
+++ b/drivers/kvm/irq.h
@@ -139,7 +139,7 @@ int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu);
 int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu);
 int kvm_create_lapic(struct kvm_vcpu *vcpu);
 void kvm_lapic_reset(struct kvm_vcpu *vcpu);
-void kvm_free_apic(struct kvm_lapic *apic);
+void kvm_free_lapic(struct kvm_vcpu *vcpu);
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu);
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8);
 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index d220e63..760753d 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -268,10 +268,8 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init);
 
 void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
+   kvm_free_lapic(vcpu);
kvm_mmu_destroy(vcpu);
-   if (vcpu->apic)
-   hrtimer_cancel(&vcpu->apic->timer.dev);
-   kvm_free_apic(vcpu->apic);
free_page((unsigned long)vcpu->pio_data);
free_page((unsigned long)vcpu->run);
 }
diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index 238fcad..8e8dab0 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -762,19 +762,17 @@ static int apic_mmio_range(struct kvm_io_device *this, 
gpa_t addr)
return ret;
 }
 
-void kvm_free_apic(struct kvm_lapic *apic)
+void kvm_free_lapic(struct kvm_vcpu *vcpu)
 {
-   if (!apic)
+   if (!vcpu->apic)
return;
 
-   hrtimer_cancel(&apic->timer.dev);
+   hrtimer_cancel(&vcpu->apic->timer.dev);
 
-   if (apic->regs_page) {
-   __free_page(apic->regs_page);
-   apic->regs_page = 0;
-   }
+   if (vcpu->apic->regs_page)
+   __free_page(vcpu->apic->regs_page);
 
-   kfree(apic);
+   kfree(vcpu->apic);
 }
 
 /*
@@ -962,7 +960,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
if (apic->regs_page == NULL) {
printk(KERN_ERR "malloc apic regs error for vcpu %x\n",
   vcpu->vcpu_id);
-   goto nomem;
+   goto nomem_free_apic;
}
apic->regs = page_address(apic->regs_page);
memset(apic->regs, 0, PAGE_SIZE);
@@ -980,8 +978,9 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu)
apic->dev.private = apic;
 
return 0;
+nomem_free_apic:
+   kfree(apic);
 nomem:
-   kvm_free_apic(apic);
return -ENOMEM;
 }
 EXPORT_SYMBOL_GPL(kvm_create_lapic);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 31/50] KVM: Move guest pte dirty bit management to the guest pagetable walker

2007-12-23 Thread Avi Kivity
This is more consistent with the accessed bit management, and makes the dirty
bit available earlier for other purposes.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/mmu.c |5 +
 drivers/kvm/paging_tmpl.h |   31 ---
 2 files changed, 13 insertions(+), 23 deletions(-)

diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index d046ba8..e6616a6 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -199,6 +199,11 @@ static int is_writeble_pte(unsigned long pte)
return pte & PT_WRITABLE_MASK;
 }
 
+static int is_dirty_pte(unsigned long pte)
+{
+   return pte & PT_DIRTY_MASK;
+}
+
 static int is_io_pte(unsigned long pte)
 {
return pte & PT_SHADOW_IO_MARK;
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 8e1e4ca..da36e48 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -144,6 +144,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
if (walker->level == PT_PAGE_TABLE_LEVEL) {
walker->gfn = (*ptep & PT_BASE_ADDR_MASK)
>> PAGE_SHIFT;
+   if (write_fault && !is_dirty_pte(*ptep)) {
+   mark_page_dirty(vcpu->kvm, table_gfn);
+   *ptep |= PT_DIRTY_MASK;
+   }
break;
}
 
@@ -153,6 +157,10 @@ static int FNAME(walk_addr)(struct guest_walker *walker,
walker->gfn = (*ptep & PT_DIR_BASE_ADDR_MASK)
>> PAGE_SHIFT;
walker->gfn += PT_INDEX(addr, PT_PAGE_TABLE_LEVEL);
+   if (write_fault && !is_dirty_pte(*ptep)) {
+   mark_page_dirty(vcpu->kvm, table_gfn);
+   *ptep |= PT_DIRTY_MASK;
+   }
break;
}
 
@@ -194,12 +202,6 @@ err:
return 0;
 }
 
-static void FNAME(mark_pagetable_dirty)(struct kvm *kvm,
-   struct guest_walker *walker)
-{
-   mark_page_dirty(kvm, walker->table_gfn[walker->level - 1]);
-}
-
 static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
  u64 *shadow_pte,
  gpa_t gaddr,
@@ -221,23 +223,6 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
 __FUNCTION__, *shadow_pte, (u64)gpte, access_bits,
 write_fault, user_fault, gfn);
 
-   if (write_fault && !dirty) {
-   pt_element_t *guest_ent, *tmp = NULL;
-
-   if (walker->ptep)
-   guest_ent = walker->ptep;
-   else {
-   tmp = kmap_atomic(walker->page, KM_USER0);
-   guest_ent = &tmp[walker->index];
-   }
-
-   *guest_ent |= PT_DIRTY_MASK;
-   if (!walker->ptep)
-   kunmap_atomic(tmp, KM_USER0);
-   dirty = 1;
-   FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
-   }
-
/*
 * We don't set the accessed bit, since we sometimes want to see
 * whether the guest actually used the pte (in order to detect
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 25/50] KVM: Remove gratuitous casts from lapic.c

2007-12-23 Thread Avi Kivity
From: Rusty Russell <[EMAIL PROTECTED]>

Since vcpu->apic is of the correct type, there's not need to cast.

Signed-off-by: Rusty Russell <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/lapic.c |   10 +-
 1 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/kvm/lapic.c b/drivers/kvm/lapic.c
index 8e8dab0..554e73a 100644
--- a/drivers/kvm/lapic.c
+++ b/drivers/kvm/lapic.c
@@ -172,7 +172,7 @@ static inline int apic_find_highest_irr(struct kvm_lapic 
*apic)
 
 int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu)
 {
-   struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+   struct kvm_lapic *apic = vcpu->apic;
int highest_irr;
 
if (!apic)
@@ -783,7 +783,7 @@ void kvm_free_lapic(struct kvm_vcpu *vcpu)
 
 void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long cr8)
 {
-   struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+   struct kvm_lapic *apic = vcpu->apic;
 
if (!apic)
return;
@@ -792,7 +792,7 @@ void kvm_lapic_set_tpr(struct kvm_vcpu *vcpu, unsigned long 
cr8)
 
 u64 kvm_lapic_get_cr8(struct kvm_vcpu *vcpu)
 {
-   struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+   struct kvm_lapic *apic = vcpu->apic;
u64 tpr;
 
if (!apic)
@@ -805,7 +805,7 @@ EXPORT_SYMBOL_GPL(kvm_lapic_get_cr8);
 
 void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
 {
-   struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+   struct kvm_lapic *apic = vcpu->apic;
 
if (!apic) {
value |= MSR_IA32_APICBASE_BSP;
@@ -882,7 +882,7 @@ EXPORT_SYMBOL_GPL(kvm_lapic_reset);
 
 int kvm_lapic_enabled(struct kvm_vcpu *vcpu)
 {
-   struct kvm_lapic *apic = (struct kvm_lapic *)vcpu->apic;
+   struct kvm_lapic *apic = vcpu->apic;
int ret = 0;
 
if (!apic)
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 12/50] KVM: x86 emulator: split some decoding into functions for readability

2007-12-23 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

To improve readability, move push, writeback, and grp 1a/2/3/4/5/9 emulation
parts into functions.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |  451 ++--
 1 files changed, 266 insertions(+), 185 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index cab1719..a108736 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -907,6 +907,244 @@ done:
return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
 }
 
+static inline void emulate_push(struct x86_emulate_ctxt *ctxt)
+{
+   struct decode_cache *c = &ctxt->decode;
+
+   c->dst.type  = OP_MEM;
+   c->dst.bytes = c->op_bytes;
+   c->dst.val = c->src.val;
+   register_address_increment(c->regs[VCPU_REGS_RSP], -c->op_bytes);
+   c->dst.ptr = (void *) register_address(ctxt->ss_base,
+  c->regs[VCPU_REGS_RSP]);
+}
+
+static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
+   struct x86_emulate_ops *ops)
+{
+   struct decode_cache *c = &ctxt->decode;
+   int rc;
+
+   /* 64-bit mode: POP always pops a 64-bit operand. */
+
+   if (ctxt->mode == X86EMUL_MODE_PROT64)
+   c->dst.bytes = 8;
+
+   rc = ops->read_std(register_address(ctxt->ss_base,
+   c->regs[VCPU_REGS_RSP]),
+  &c->dst.val, c->dst.bytes, ctxt->vcpu);
+   if (rc != 0)
+   return rc;
+
+   register_address_increment(c->regs[VCPU_REGS_RSP], c->dst.bytes);
+
+   return 0;
+}
+
+static inline void emulate_grp2(struct decode_cache *c, unsigned long *_eflags)
+{
+   switch (c->modrm_reg) {
+   case 0: /* rol */
+   emulate_2op_SrcB("rol", c->src, c->dst, *_eflags);
+   break;
+   case 1: /* ror */
+   emulate_2op_SrcB("ror", c->src, c->dst, *_eflags);
+   break;
+   case 2: /* rcl */
+   emulate_2op_SrcB("rcl", c->src, c->dst, *_eflags);
+   break;
+   case 3: /* rcr */
+   emulate_2op_SrcB("rcr", c->src, c->dst, *_eflags);
+   break;
+   case 4: /* sal/shl */
+   case 6: /* sal/shl */
+   emulate_2op_SrcB("sal", c->src, c->dst, *_eflags);
+   break;
+   case 5: /* shr */
+   emulate_2op_SrcB("shr", c->src, c->dst, *_eflags);
+   break;
+   case 7: /* sar */
+   emulate_2op_SrcB("sar", c->src, c->dst, *_eflags);
+   break;
+   }
+}
+
+static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
+  struct x86_emulate_ops *ops,
+  unsigned long *_eflags)
+{
+   struct decode_cache *c = &ctxt->decode;
+   int rc = 0;
+
+   switch (c->modrm_reg) {
+   case 0 ... 1:   /* test */
+   /*
+* Special case in Grp3: test has an immediate
+* source operand.
+*/
+   c->src.type = OP_IMM;
+   c->src.ptr = (unsigned long *)c->eip;
+   c->src.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+   if (c->src.bytes == 8)
+   c->src.bytes = 4;
+   switch (c->src.bytes) {
+   case 1:
+   c->src.val = insn_fetch(s8, 1, c->eip);
+   break;
+   case 2:
+   c->src.val = insn_fetch(s16, 2, c->eip);
+   break;
+   case 4:
+   c->src.val = insn_fetch(s32, 4, c->eip);
+   break;
+   }
+   emulate_2op_SrcV("test", c->src, c->dst, *_eflags);
+   break;
+   case 2: /* not */
+   c->dst.val = ~c->dst.val;
+   break;
+   case 3: /* neg */
+   emulate_1op("neg", c->dst, *_eflags);
+   break;
+   default:
+   DPRINTF("Cannot emulate %02x\n", c->b);
+   rc = X86EMUL_UNHANDLEABLE;
+   break;
+   }
+done:
+   return rc;
+}
+
+static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
+  struct x86_emulate_ops *ops,
+  unsigned long *_eflags,
+  int *no_wb)
+{
+   struct decode_cache *c = &ctxt->decode;
+   int rc;
+
+   switch (c->modrm_reg) {
+   case 0: /* inc */
+   emulate

[kvm-devel] [PATCH 14/50] KVM: x86 emulator: Remove no_wb, use dst.type = OP_NONE instead

2007-12-23 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

Remove no_wb, use dst.type = OP_NONE instead, idea stollen from xen-3.1

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |   76 ++--
 drivers/kvm/x86_emulate.h |2 +-
 2 files changed, 25 insertions(+), 53 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 45beeb9..8b0186f 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1016,8 +1016,7 @@ done:
 }
 
 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
-  struct x86_emulate_ops *ops,
-  int *no_wb)
+  struct x86_emulate_ops *ops)
 {
struct decode_cache *c = &ctxt->decode;
int rc;
@@ -1055,7 +1054,7 @@ static inline int emulate_grp45(struct x86_emulate_ctxt 
*ctxt,
c->dst.bytes, ctxt->vcpu);
if (rc != 0)
return rc;
-   *no_wb = 1;
+   c->dst.type = OP_NONE;
break;
default:
DPRINTF("Cannot emulate %02x\n", c->b);
@@ -1137,6 +1136,10 @@ static inline int writeback(struct x86_emulate_ctxt 
*ctxt,
ctxt->vcpu);
if (rc != 0)
return rc;
+   break;
+   case OP_NONE:
+   /* no writeback */
+   break;
default:
break;
}
@@ -1147,7 +1150,6 @@ int
 x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
unsigned long cr2 = ctxt->cr2;
-   int no_wb = 0;
u64 msr_data;
unsigned long saved_eip = 0;
struct decode_cache *c = &ctxt->decode;
@@ -1344,18 +1346,16 @@ x86_emulate_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
goto done;
break;
case 0xfe ... 0xff: /* Grp4/Grp5 */
-   rc = emulate_grp45(ctxt, ops, &no_wb);
+   rc = emulate_grp45(ctxt, ops);
if (rc != 0)
goto done;
break;
}
 
 writeback:
-   if (!no_wb) {
-   rc = writeback(ctxt, ops);
-   if (rc != 0)
-   goto done;
-   }
+   rc = writeback(ctxt, ops);
+   if (rc != 0)
+   goto done;
 
/* Commit shadow register state. */
memcpy(ctxt->vcpu->regs, c->regs, sizeof c->regs);
@@ -1395,7 +1395,7 @@ special_insn:
 
register_address_increment(c->regs[VCPU_REGS_RSP],
   c->op_bytes);
-   no_wb = 1; /* Disable writeback. */
+   c->dst.type = OP_NONE;  /* Disable writeback. */
break;
case 0x6a: /* push imm8 */
c->src.val = 0L;
@@ -1538,7 +1538,7 @@ special_insn:
case 0xe9: /* jmp rel */
case 0xeb: /* jmp rel short */
JMP_REL(c->src.val);
-   no_wb = 1; /* Disable writeback. */
+   c->dst.type = OP_NONE; /* Disable writeback. */
break;
 
 
@@ -1548,8 +1548,6 @@ special_insn:
 twobyte_insn:
switch (c->b) {
case 0x01: /* lgdt, lidt, lmsw */
-   /* Disable writeback. */
-   no_wb = 1;
switch (c->modrm_reg) {
u16 size;
unsigned long address;
@@ -1604,56 +1602,30 @@ twobyte_insn:
default:
goto cannot_emulate;
}
+   /* Disable writeback. */
+   c->dst.type = OP_NONE;
break;
case 0x21: /* mov from dr to reg */
-   no_wb = 1;
if (c->modrm_mod != 3)
goto cannot_emulate;
rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
+   if (rc)
+   goto cannot_emulate;
+   c->dst.type = OP_NONE;  /* no writeback */
break;
case 0x23: /* mov from reg to dr */
-   no_wb = 1;
if (c->modrm_mod != 3)
goto cannot_emulate;
rc = emulator_set_dr(ctxt, c->modrm_reg,
 c->regs[c->modrm_rm]);
+   if (rc)
+   goto cannot_emulate;
+   c->dst.type = OP_NONE;  /* no writeback */
break;
case 0x40 ... 0x4f: /* cmov */
c->dst.val = c->dst.orig_val = c->src.val;
-   no_wb = 1;
-   /*
-* First, assume we're decod

[kvm-devel] [PATCH 04/50] KVM: x86 emulator: move all x86_emulate_memop() to a structure

2007-12-23 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

Move all x86_emulate_memop() common variables between decode and execute to a
structure decode_cache.  This will help in later separating decode and
emulate.

struct decode_cache {
u8 twobyte;
u8 b;
u8 lock_prefix;
u8 rep_prefix;
u8 op_bytes;
u8 ad_bytes;
struct operand src;
struct operand dst;
unsigned long *override_base;
unsigned int d;
unsigned long regs[NR_VCPU_REGS];
unsigned long eip;
/* modrm */
u8 modrm;
u8 modrm_mod;
u8 modrm_reg;
u8 modrm_rm;
u8 use_modrm_ea;
unsigned long modrm_ea;
unsigned long modrm_val;
   };

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |  919 -
 drivers/kvm/x86_emulate.h |   34 ++
 2 files changed, 518 insertions(+), 435 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 9ea82f8..f946182 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -222,13 +222,6 @@ static u16 twobyte_table[256] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-/* Type, address-of, and value of an instruction's operand. */
-struct operand {
-   enum { OP_REG, OP_MEM, OP_IMM } type;
-   unsigned int bytes;
-   unsigned long val, orig_val, *ptr;
-};
-
 /* EFLAGS bit definitions. */
 #define EFLG_OF (1<<11)
 #define EFLG_DF (1<<10)
@@ -431,24 +424,26 @@ struct operand {
 
 /* Access/update address held in a register, based on addressing mode. */
 #define address_mask(reg)  \
-   ((ad_bytes == sizeof(unsigned long)) ?  \
-   (reg) : ((reg) & ((1UL << (ad_bytes << 3)) - 1)))
+   ((c->ad_bytes == sizeof(unsigned long)) ?   \
+   (reg) : ((reg) & ((1UL << (c->ad_bytes << 3)) - 1)))
 #define register_address(base, reg) \
((base) + address_mask(reg))
 #define register_address_increment(reg, inc)\
do {\
/* signed type ensures sign extension to long */\
int _inc = (inc);   \
-   if ( ad_bytes == sizeof(unsigned long) )\
+   if (c->ad_bytes == sizeof(unsigned long))   \
(reg) += _inc;  \
else\
-   (reg) = ((reg) & ~((1UL << (ad_bytes << 3)) - 1)) | \
-  (((reg) + _inc) & ((1UL << (ad_bytes << 3)) - 1)); \
+   (reg) = ((reg) &\
+~((1UL << (c->ad_bytes << 3)) - 1)) |  \
+   (((reg) + _inc) &   \
+((1UL << (c->ad_bytes << 3)) - 1));\
} while (0)
 
 #define JMP_REL(rel)   \
do {\
-   register_address_increment(_eip, rel);  \
+   register_address_increment(c->eip, rel);\
} while (0)
 
 /*
@@ -524,39 +519,35 @@ static int test_cc(unsigned int condition, unsigned int 
flags)
 int
 x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
-   unsigned d;
-   u8 b, sib, twobyte = 0, rex_prefix = 0;
-   u8 modrm, modrm_mod = 0, modrm_reg = 0, modrm_rm = 0;
-   unsigned long *override_base = NULL;
-   unsigned int op_bytes, ad_bytes, lock_prefix = 0, rep_prefix = 0, i;
+   struct decode_cache *c = &ctxt->decode;
+   u8 sib, rex_prefix = 0;
+   unsigned int i;
int rc = 0;
-   struct operand src, dst;
unsigned long cr2 = ctxt->cr2;
int mode = ctxt->mode;
-   unsigned long modrm_ea;
-   int use_modrm_ea, index_reg = 0, base_reg = 0, scale, rip_relative = 0;
+   int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
int no_wb = 0;
u64 msr_data;
 
/* Shadow copy of register state. Committed on successful emulation. */
-   unsigned long _regs[NR_VCPU_REGS];
-   unsigned long _eip = ctxt->vcpu->rip, _eflags = ctxt->eflags;
-   unsigned long modrm_v

[kvm-devel] [PATCH 19/50] KVM: VMX: Simplify vcpu_clear()

2007-12-23 Thread Avi Kivity
Now that smp_call_function_single() knows how to call a function on the
current cpu, there's no need to check explicitly.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |6 +-
 1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index 8929575..c87f52b 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -227,11 +227,7 @@ static void vcpu_clear(struct vcpu_vmx *vmx)
 {
if (vmx->vcpu.cpu == -1)
return;
-   if (vmx->vcpu.cpu != raw_smp_processor_id())
-   smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear,
-vmx, 0, 1);
-   else
-   __vcpu_clear(vmx);
+   smp_call_function_single(vmx->vcpu.cpu, __vcpu_clear, vmx, 0, 1);
vmx->launched = 0;
 }
 
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 13/50] KVM: x86 emulator: remove _eflags and use directly ctxt->eflags.

2007-12-23 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

Remove _eflags and use directly ctxt->eflags. Caching eflags is not needed as
it is restored to vcpu by kvm_main.c:emulate_instruction() from ctxt->eflags
only if emulation doesn't fail.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |  121 ++---
 1 files changed, 59 insertions(+), 62 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index a108736..45beeb9 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -941,37 +941,37 @@ static inline int emulate_grp1a(struct x86_emulate_ctxt 
*ctxt,
return 0;
 }
 
-static inline void emulate_grp2(struct decode_cache *c, unsigned long *_eflags)
+static inline void emulate_grp2(struct x86_emulate_ctxt *ctxt)
 {
+   struct decode_cache *c = &ctxt->decode;
switch (c->modrm_reg) {
case 0: /* rol */
-   emulate_2op_SrcB("rol", c->src, c->dst, *_eflags);
+   emulate_2op_SrcB("rol", c->src, c->dst, ctxt->eflags);
break;
case 1: /* ror */
-   emulate_2op_SrcB("ror", c->src, c->dst, *_eflags);
+   emulate_2op_SrcB("ror", c->src, c->dst, ctxt->eflags);
break;
case 2: /* rcl */
-   emulate_2op_SrcB("rcl", c->src, c->dst, *_eflags);
+   emulate_2op_SrcB("rcl", c->src, c->dst, ctxt->eflags);
break;
case 3: /* rcr */
-   emulate_2op_SrcB("rcr", c->src, c->dst, *_eflags);
+   emulate_2op_SrcB("rcr", c->src, c->dst, ctxt->eflags);
break;
case 4: /* sal/shl */
case 6: /* sal/shl */
-   emulate_2op_SrcB("sal", c->src, c->dst, *_eflags);
+   emulate_2op_SrcB("sal", c->src, c->dst, ctxt->eflags);
break;
case 5: /* shr */
-   emulate_2op_SrcB("shr", c->src, c->dst, *_eflags);
+   emulate_2op_SrcB("shr", c->src, c->dst, ctxt->eflags);
break;
case 7: /* sar */
-   emulate_2op_SrcB("sar", c->src, c->dst, *_eflags);
+   emulate_2op_SrcB("sar", c->src, c->dst, ctxt->eflags);
break;
}
 }
 
 static inline int emulate_grp3(struct x86_emulate_ctxt *ctxt,
-  struct x86_emulate_ops *ops,
-  unsigned long *_eflags)
+  struct x86_emulate_ops *ops)
 {
struct decode_cache *c = &ctxt->decode;
int rc = 0;
@@ -998,13 +998,13 @@ static inline int emulate_grp3(struct x86_emulate_ctxt 
*ctxt,
c->src.val = insn_fetch(s32, 4, c->eip);
break;
}
-   emulate_2op_SrcV("test", c->src, c->dst, *_eflags);
+   emulate_2op_SrcV("test", c->src, c->dst, ctxt->eflags);
break;
case 2: /* not */
c->dst.val = ~c->dst.val;
break;
case 3: /* neg */
-   emulate_1op("neg", c->dst, *_eflags);
+   emulate_1op("neg", c->dst, ctxt->eflags);
break;
default:
DPRINTF("Cannot emulate %02x\n", c->b);
@@ -1017,7 +1017,6 @@ done:
 
 static inline int emulate_grp45(struct x86_emulate_ctxt *ctxt,
   struct x86_emulate_ops *ops,
-  unsigned long *_eflags,
   int *no_wb)
 {
struct decode_cache *c = &ctxt->decode;
@@ -1025,10 +1024,10 @@ static inline int emulate_grp45(struct x86_emulate_ctxt 
*ctxt,
 
switch (c->modrm_reg) {
case 0: /* inc */
-   emulate_1op("inc", c->dst, *_eflags);
+   emulate_1op("inc", c->dst, ctxt->eflags);
break;
case 1: /* dec */
-   emulate_1op("dec", c->dst, *_eflags);
+   emulate_1op("dec", c->dst, ctxt->eflags);
break;
case 4: /* jmp abs */
if (c->b == 0xff)
@@ -1067,7 +1066,6 @@ static inline int emulate_grp45(struct x86_emulate_ctxt 
*ctxt,
 
 static inline int emulate_grp9(struct x86_emulate_ctxt *ctxt,
   struct x86_emulate_ops *ops,
-  unsigned long *_eflags,
   unsigned long cr2)
 {
struct decode_cache *c = &ctxt->decode;
@@ -1083,7 +1081,7 @@ static inline int emul

[kvm-devel] [PATCH 16/50] KVM: Purify x86_decode_insn() error case management

2007-12-23 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

The only valid case is on protected page access, other cases are errors.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |   10 +++---
 1 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index b10fd7e..f7566b9 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1251,7 +1251,7 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
u16 error_code,
int no_decode)
 {
-   int r = 0;
+   int r;
 
vcpu->mmio_fault_cr2 = cr2;
kvm_x86_ops->cache_regs(vcpu);
@@ -1294,10 +1294,14 @@ int emulate_instruction(struct kvm_vcpu *vcpu,
get_segment_base(vcpu, VCPU_SREG_FS);
 
r = x86_decode_insn(&vcpu->emulate_ctxt, &emulate_ops);
+   if (r)  {
+   if (kvm_mmu_unprotect_page_virt(vcpu, cr2))
+   return EMULATE_DONE;
+   return EMULATE_FAIL;
+   }
}
 
-   if (r == 0)
-   r = x86_emulate_insn(&vcpu->emulate_ctxt, &emulate_ops);
+   r = x86_emulate_insn(&vcpu->emulate_ctxt, &emulate_ops);
 
if (vcpu->pio.string)
return EMULATE_DO_MMIO;
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 21/50] KVM: Add general accessors to read and write guest memory

2007-12-23 Thread Avi Kivity
From: Izik Eidus <[EMAIL PROTECTED](none)>

Signed-off-by: Izik Eidus <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h  |9 +++
 drivers/kvm/kvm_main.c |  160 +++-
 drivers/kvm/vmx.c  |   43 ++---
 3 files changed, 158 insertions(+), 54 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 80cfb99..1965438 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -559,6 +559,15 @@ extern hpa_t bad_page_address;
 
 gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn);
 struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
+int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
+   int len);
+int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len);
+int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
+int offset, int len);
+int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
+   unsigned long len);
+int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len);
+int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len);
 struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn);
 void mark_page_dirty(struct kvm *kvm, gfn_t gfn);
 
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index ac563fc..3d1972e 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -400,22 +400,16 @@ static int load_pdptrs(struct kvm_vcpu *vcpu, unsigned 
long cr3)
gfn_t pdpt_gfn = cr3 >> PAGE_SHIFT;
unsigned offset = ((cr3 & (PAGE_SIZE-1)) >> 5) << 2;
int i;
-   u64 *pdpt;
int ret;
-   struct page *page;
u64 pdpte[ARRAY_SIZE(vcpu->pdptrs)];
 
mutex_lock(&vcpu->kvm->lock);
-   page = gfn_to_page(vcpu->kvm, pdpt_gfn);
-   if (!page) {
+   ret = kvm_read_guest_page(vcpu->kvm, pdpt_gfn, pdpte,
+ offset * sizeof(u64), sizeof(pdpte));
+   if (ret < 0) {
ret = 0;
goto out;
}
-
-   pdpt = kmap_atomic(page, KM_USER0);
-   memcpy(pdpte, pdpt+offset, sizeof(pdpte));
-   kunmap_atomic(pdpt, KM_USER0);
-
for (i = 0; i < ARRAY_SIZE(pdpte); ++i) {
if ((pdpte[i] & 1) && (pdpte[i] & 0xfff001e6ull)) {
ret = 0;
@@ -962,6 +956,127 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn)
 }
 EXPORT_SYMBOL_GPL(gfn_to_page);
 
+static int next_segment(unsigned long len, int offset)
+{
+   if (len > PAGE_SIZE - offset)
+   return PAGE_SIZE - offset;
+   else
+   return len;
+}
+
+int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset,
+   int len)
+{
+   void *page_virt;
+   struct page *page;
+
+   page = gfn_to_page(kvm, gfn);
+   if (!page)
+   return -EFAULT;
+   page_virt = kmap_atomic(page, KM_USER0);
+
+   memcpy(data, page_virt + offset, len);
+
+   kunmap_atomic(page_virt, KM_USER0);
+   return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest_page);
+
+int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len)
+{
+   gfn_t gfn = gpa >> PAGE_SHIFT;
+   int seg;
+   int offset = offset_in_page(gpa);
+   int ret;
+
+   while ((seg = next_segment(len, offset)) != 0) {
+   ret = kvm_read_guest_page(kvm, gfn, data, offset, seg);
+   if (ret < 0)
+   return ret;
+   offset = 0;
+   len -= seg;
+   data += seg;
+   ++gfn;
+   }
+   return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_read_guest);
+
+int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data,
+int offset, int len)
+{
+   void *page_virt;
+   struct page *page;
+
+   page = gfn_to_page(kvm, gfn);
+   if (!page)
+   return -EFAULT;
+   page_virt = kmap_atomic(page, KM_USER0);
+
+   memcpy(page_virt + offset, data, len);
+
+   kunmap_atomic(page_virt, KM_USER0);
+   mark_page_dirty(kvm, gfn);
+   return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_write_guest_page);
+
+int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data,
+   unsigned long len)
+{
+   gfn_t gfn = gpa >> PAGE_SHIFT;
+   int seg;
+   int offset = offset_in_page(gpa);
+   int ret;
+
+   while ((seg = next_segment(len, offset)) != 0) {
+   ret = kvm_write_guest_page(kvm, gfn, data, offset, seg);
+   if (ret < 0)
+   return ret;
+   offset = 0;
+   len -= seg;
+   data += seg;
+   ++gfn;
+   }
+   return 0;
+}
+
+int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len)
+{

[kvm-devel] [PATCH 17/50] KVM: x86 emulator: Any legacy prefix after a REX prefix nullifies its effect

2007-12-23 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

This patch modifies the management of REX prefix according behavior
I saw in Xen 3.1.  In Xen, this modification has been introduced by
Jan Beulich.

http://lists.xensource.com/archives/html/xen-changelog/2007-01/msg00081.html

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |   24 +++-
 1 files changed, 15 insertions(+), 9 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index fe50317..e6b213b 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -521,7 +521,6 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
 {
struct decode_cache *c = &ctxt->decode;
u8 sib, rex_prefix = 0;
-   unsigned int i;
int rc = 0;
int mode = ctxt->mode;
int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
@@ -551,7 +550,7 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
}
 
/* Legacy prefixes. */
-   for (i = 0; i < 8; i++) {
+   for (;;) {
switch (c->b = insn_fetch(u8, 1, c->eip)) {
case 0x66:  /* operand-size override */
c->op_bytes ^= 6;   /* switch between 2/4 bytes */
@@ -582,6 +581,11 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
case 0x36:  /* SS override */
c->override_base = &ctxt->ss_base;
break;
+   case 0x40 ... 0x4f: /* REX */
+   if (mode != X86EMUL_MODE_PROT64)
+   goto done_prefixes;
+   rex_prefix = c->b;
+   continue;
case 0xf0:  /* LOCK */
c->lock_prefix = 1;
break;
@@ -592,19 +596,21 @@ x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct 
x86_emulate_ops *ops)
default:
goto done_prefixes;
}
+
+   /* Any legacy prefix after a REX prefix nullifies its effect. */
+
+   rex_prefix = 0;
}
 
 done_prefixes:
 
/* REX prefix. */
-   if ((mode == X86EMUL_MODE_PROT64) && ((c->b & 0xf0) == 0x40)) {
-   rex_prefix = c->b;
-   if (c->b & 8)
+   if (rex_prefix) {
+   if (rex_prefix & 8)
c->op_bytes = 8;/* REX.W */
-   c->modrm_reg = (c->b & 4) << 1; /* REX.R */
-   index_reg = (c->b & 2) << 2; /* REX.X */
-   c->modrm_rm = base_reg = (c->b & 1) << 3; /* REG.B */
-   c->b = insn_fetch(u8, 1, c->eip);
+   c->modrm_reg = (rex_prefix & 4) << 1;   /* REX.R */
+   index_reg = (rex_prefix & 2) << 2; /* REX.X */
+   c->modrm_rm = base_reg = (rex_prefix & 1) << 3; /* REG.B */
}
 
/* Opcode byte(s). */
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 15/50] KVM: x86_emulator: no writeback for bt

2007-12-23 Thread Avi Kivity
From: Qing He <[EMAIL PROTECTED]>

Signed-off-by: Qing He <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |1 +
 1 files changed, 1 insertions(+), 0 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index 8b0186f..fe50317 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -1629,6 +1629,7 @@ twobyte_insn:
break;
case 0xa3:
  bt:   /* bt */
+   c->dst.type = OP_NONE;
/* only subword offset */
c->src.val &= (c->dst.bytes << 3) - 1;
emulate_2op_SrcV_nobyte("bt", c->src, c->dst, ctxt->eflags);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 05/50] KVM: x86 emulator: move all decoding process to function x86_decode_insn()

2007-12-23 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

Split the decoding process into a new function x86_decode_insn().

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/x86_emulate.c |   77 +++--
 1 files changed, 53 insertions(+), 24 deletions(-)

diff --git a/drivers/kvm/x86_emulate.c b/drivers/kvm/x86_emulate.c
index f946182..f20534b 100644
--- a/drivers/kvm/x86_emulate.c
+++ b/drivers/kvm/x86_emulate.c
@@ -517,20 +517,16 @@ static int test_cc(unsigned int condition, unsigned int 
flags)
 }
 
 int
-x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
+x86_decode_insn(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
 {
struct decode_cache *c = &ctxt->decode;
u8 sib, rex_prefix = 0;
unsigned int i;
int rc = 0;
-   unsigned long cr2 = ctxt->cr2;
int mode = ctxt->mode;
int index_reg = 0, base_reg = 0, scale, rip_relative = 0;
-   int no_wb = 0;
-   u64 msr_data;
 
/* Shadow copy of register state. Committed on successful emulation. */
-   unsigned long _eflags = ctxt->eflags;
 
memset(c, 0, sizeof(struct decode_cache));
c->eip = ctxt->vcpu->rip;
@@ -622,8 +618,10 @@ done_prefixes:
}
 
/* Unrecognised? */
-   if (c->d == 0)
-   goto cannot_emulate;
+   if (c->d == 0) {
+   DPRINTF("Cannot emulate %02x\n", c->b);
+   return -1;
+   }
}
 
/* ModRM and SIB bytes. */
@@ -776,7 +774,6 @@ done_prefixes:
}
if (c->ad_bytes != 8)
c->modrm_ea = (u32)c->modrm_ea;
-   cr2 = c->modrm_ea;
modrm_done:
;
}
@@ -838,13 +835,6 @@ done_prefixes:
break;
}
c->src.type = OP_MEM;
-   c->src.ptr = (unsigned long *)cr2;
-   c->src.val = 0;
-   if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
-  &c->src.val,
-  c->src.bytes, ctxt->vcpu)) != 0)
-   goto done;
-   c->src.orig_val = c->src.val;
break;
case SrcImm:
c->src.type = OP_IMM;
@@ -877,7 +867,7 @@ done_prefixes:
switch (c->d & DstMask) {
case ImplicitOps:
/* Special instructions do their own operand decoding. */
-   goto special_insn;
+   return 0;
case DstReg:
c->dst.type = OP_REG;
if ((c->d & ByteOp)
@@ -905,14 +895,54 @@ done_prefixes:
}
break;
case DstMem:
-   c->dst.type = OP_MEM;
-   c->dst.ptr = (unsigned long *)cr2;
-   c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
-   c->dst.val = 0;
if ((c->d & ModRM) && c->modrm_mod == 3) {
c->dst.type = OP_REG;
break;
}
+   c->dst.type = OP_MEM;
+   break;
+   }
+
+done:
+   return (rc == X86EMUL_UNHANDLEABLE) ? -1 : 0;
+}
+
+int
+x86_emulate_memop(struct x86_emulate_ctxt *ctxt, struct x86_emulate_ops *ops)
+{
+   unsigned long cr2 = ctxt->cr2;
+   int no_wb = 0;
+   u64 msr_data;
+   unsigned long _eflags = ctxt->eflags;
+   struct decode_cache *c = &ctxt->decode;
+   int rc;
+
+   rc = x86_decode_insn(ctxt, ops);
+   if (rc)
+   return rc;
+
+   if ((c->d & ModRM) && (c->modrm_mod != 3))
+   cr2 = c->modrm_ea;
+
+   if (c->src.type == OP_MEM) {
+   c->src.ptr = (unsigned long *)cr2;
+   c->src.val = 0;
+   if ((rc = ops->read_emulated((unsigned long)c->src.ptr,
+&c->src.val,
+c->src.bytes,
+ctxt->vcpu)) != 0)
+   goto done;
+   c->src.orig_val = c->src.val;
+   }
+
+   if ((c->d & DstMask) == ImplicitOps)
+   goto special_insn;
+
+
+   if (c->dst.type == OP_MEM) {
+   c->dst.ptr = (unsigned long *)cr2;
+   c->dst.bytes = (c->d & ByteOp) ? 1 : c->op_bytes;
+   c->dst.val = 0;
if (c->d & BitOp) {
unsigned long mask = ~(c->dst.bytes * 8 - 1);
 
@@ -925,7 +955,6 @@ done_prefixes:
   

[kvm-devel] [PATCH 09/50] KVM: Allow not-present guest page faults to bypass kvm

2007-12-23 Thread Avi Kivity
There are two classes of page faults trapped by kvm:
 - host page faults, where the fault is needed to allow kvm to install
   the shadow pte or update the guest accessed and dirty bits
 - guest page faults, where the guest has faulted and kvm simply injects
   the fault back into the guest to handle

The second class, guest page faults, is pure overhead.  We can eliminate
some of it on vmx using the following evil trick:
 - when we set up a shadow page table entry, if the corresponding guest pte
   is not present, set up the shadow pte as not present
 - if the guest pte _is_ present, mark the shadow pte as present but also
   set one of the reserved bits in the shadow pte
 - tell the vmx hardware not to trap faults which have the present bit clear

With this, normal page-not-present faults go directly to the guest,
bypassing kvm entirely.

Unfortunately, this trick only works on Intel hardware, as AMD lacks a
way to discriminate among page faults based on error code.  It is also
a little risky since it uses reserved bits which might become unreserved
in the future, so a module parameter is provided to disable it.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |3 ++
 drivers/kvm/kvm_main.c|4 ++-
 drivers/kvm/mmu.c |   89 ++---
 drivers/kvm/paging_tmpl.h |   52 ---
 drivers/kvm/vmx.c |   11 +-
 5 files changed, 122 insertions(+), 37 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index e885b19..7de948e 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -150,6 +150,8 @@ struct kvm_mmu {
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
+   void (*prefetch_page)(struct kvm_vcpu *vcpu,
+ struct kvm_mmu_page *page);
hpa_t root_hpa;
int root_level;
int shadow_root_level;
@@ -536,6 +538,7 @@ void kvm_mmu_module_exit(void);
 void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
 int kvm_mmu_create(struct kvm_vcpu *vcpu);
 int kvm_mmu_setup(struct kvm_vcpu *vcpu);
+void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
 
 int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
 void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index fad3a08..da057cf 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -3501,7 +3501,9 @@ int kvm_init_x86(struct kvm_x86_ops *ops, unsigned int 
vcpu_size,
kvm_preempt_ops.sched_in = kvm_sched_in;
kvm_preempt_ops.sched_out = kvm_sched_out;
 
-   return r;
+   kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
+
+   return 0;
 
 out_free:
kmem_cache_destroy(kvm_vcpu_cache);
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index feb5ac9..069ce83 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -156,6 +156,16 @@ static struct kmem_cache *pte_chain_cache;
 static struct kmem_cache *rmap_desc_cache;
 static struct kmem_cache *mmu_page_header_cache;
 
+static u64 __read_mostly shadow_trap_nonpresent_pte;
+static u64 __read_mostly shadow_notrap_nonpresent_pte;
+
+void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
+{
+   shadow_trap_nonpresent_pte = trap_pte;
+   shadow_notrap_nonpresent_pte = notrap_pte;
+}
+EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
+
 static int is_write_protection(struct kvm_vcpu *vcpu)
 {
return vcpu->cr0 & X86_CR0_WP;
@@ -176,6 +186,13 @@ static int is_present_pte(unsigned long pte)
return pte & PT_PRESENT_MASK;
 }
 
+static int is_shadow_present_pte(u64 pte)
+{
+   pte &= ~PT_SHADOW_IO_MARK;
+   return pte != shadow_trap_nonpresent_pte
+   && pte != shadow_notrap_nonpresent_pte;
+}
+
 static int is_writeble_pte(unsigned long pte)
 {
return pte & PT_WRITABLE_MASK;
@@ -450,7 +467,7 @@ static int is_empty_shadow_page(u64 *spt)
u64 *end;
 
for (pos = spt, end = pos + PAGE_SIZE / sizeof(u64); pos != end; pos++)
-   if (*pos != 0) {
+   if ((*pos & ~PT_SHADOW_IO_MARK) != shadow_trap_nonpresent_pte) {
printk(KERN_ERR "%s: %p %llx\n", __FUNCTION__,
   pos, *pos);
return 0;
@@ -632,6 +649,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
page->gfn = gfn;
page->role = role;
hlist_add_head(&page->hash_link, bucket);
+   vcpu->mmu.prefetch_page(vcpu, page);
if (!metaphysical)
rmap_write_protect(vcpu, gfn);
return page;
@@ -648,9 +666,9 @@ static void kvm_mmu_page_unlink_children(struct kvm *kvm,
 
if (page->role.level == PT_PAGE_TABLE_LEVEL) {
for (i = 0; i < PT64_ENT

[kvm-devel] [PATCH 08/50] KVM: VMX: Further reduce efer reloads

2007-12-23 Thread Avi Kivity
KVM avoids reloading the efer msr when the difference between the guest
and host values consist of the long mode bits (which are switched by
hardware) and the NX bit (which is emulated by the KVM MMU).

This patch also allows KVM to ignore SCE (syscall enable) when the guest
is running in 32-bit mode.  This is because the syscall instruction is
not available in 32-bit mode on Intel processors, so the SCE bit is
effectively meaningless.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/vmx.c |   61 
 1 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/drivers/kvm/vmx.c b/drivers/kvm/vmx.c
index dcc0a84..f0f27a7 100644
--- a/drivers/kvm/vmx.c
+++ b/drivers/kvm/vmx.c
@@ -57,6 +57,7 @@ struct vcpu_vmx {
u16   fs_sel, gs_sel, ldt_sel;
int   gs_ldt_reload_needed;
int   fs_reload_needed;
+   int   guest_efer_loaded;
}host_state;
 
 };
@@ -74,8 +75,6 @@ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 static struct page *vmx_io_bitmap_a;
 static struct page *vmx_io_bitmap_b;
 
-#define EFER_SAVE_RESTORE_BITS ((u64)EFER_SCE)
-
 static struct vmcs_config {
int size;
int order;
@@ -138,18 +137,6 @@ static void save_msrs(struct kvm_msr_entry *e, int n)
rdmsrl(e[i].index, e[i].data);
 }
 
-static inline u64 msr_efer_save_restore_bits(struct kvm_msr_entry msr)
-{
-   return (u64)msr.data & EFER_SAVE_RESTORE_BITS;
-}
-
-static inline int msr_efer_need_save_restore(struct vcpu_vmx *vmx)
-{
-   int efer_offset = vmx->msr_offset_efer;
-   return msr_efer_save_restore_bits(vmx->host_msrs[efer_offset]) !=
-   msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);
-}
-
 static inline int is_page_fault(u32 intr_info)
 {
return (intr_info & (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VECTOR_MASK |
@@ -351,16 +338,42 @@ static void reload_tss(void)
 
 static void load_transition_efer(struct vcpu_vmx *vmx)
 {
-   u64 trans_efer;
int efer_offset = vmx->msr_offset_efer;
+   u64 host_efer = vmx->host_msrs[efer_offset].data;
+   u64 guest_efer = vmx->guest_msrs[efer_offset].data;
+   u64 ignore_bits;
+
+   if (efer_offset < 0)
+   return;
+   /*
+* NX is emulated; LMA and LME handled by hardware; SCE meaninless
+* outside long mode
+*/
+   ignore_bits = EFER_NX | EFER_SCE;
+#ifdef CONFIG_X86_64
+   ignore_bits |= EFER_LMA | EFER_LME;
+   /* SCE is meaningful only in long mode on Intel */
+   if (guest_efer & EFER_LMA)
+   ignore_bits &= ~(u64)EFER_SCE;
+#endif
+   if ((guest_efer & ~ignore_bits) == (host_efer & ~ignore_bits))
+   return;
 
-   trans_efer = vmx->host_msrs[efer_offset].data;
-   trans_efer &= ~EFER_SAVE_RESTORE_BITS;
-   trans_efer |= msr_efer_save_restore_bits(vmx->guest_msrs[efer_offset]);
-   wrmsrl(MSR_EFER, trans_efer);
+   vmx->host_state.guest_efer_loaded = 1;
+   guest_efer &= ~ignore_bits;
+   guest_efer |= host_efer & ignore_bits;
+   wrmsrl(MSR_EFER, guest_efer);
vmx->vcpu.stat.efer_reload++;
 }
 
+static void reload_host_efer(struct vcpu_vmx *vmx)
+{
+   if (vmx->host_state.guest_efer_loaded) {
+   vmx->host_state.guest_efer_loaded = 0;
+   load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
+   }
+}
+
 static void vmx_save_host_state(struct kvm_vcpu *vcpu)
 {
struct vcpu_vmx *vmx = to_vmx(vcpu);
@@ -406,8 +419,7 @@ static void vmx_save_host_state(struct kvm_vcpu *vcpu)
}
 #endif
load_msrs(vmx->guest_msrs, vmx->save_nmsrs);
-   if (msr_efer_need_save_restore(vmx))
-   load_transition_efer(vmx);
+   load_transition_efer(vmx);
 }
 
 static void vmx_load_host_state(struct vcpu_vmx *vmx)
@@ -436,8 +448,7 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx)
reload_tss();
save_msrs(vmx->guest_msrs, vmx->save_nmsrs);
load_msrs(vmx->host_msrs, vmx->save_nmsrs);
-   if (msr_efer_need_save_restore(vmx))
-   load_msrs(vmx->host_msrs + vmx->msr_offset_efer, 1);
+   reload_host_efer(vmx);
 }
 
 /*
@@ -727,8 +738,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, u32 
msr_index, u64 data)
 #ifdef CONFIG_X86_64
case MSR_EFER:
ret = kvm_set_msr_common(vcpu, msr_index, data);
-   if (vmx->host_state.loaded)
+   if (vmx->host_state.loaded) {
+   reload_host_efer(vmx);
load_transition_efer(vmx);
+   }
break;
case MSR_FS_BASE:
vmcs_writel(GUEST_FS_BASE, data);
-- 
1.5.3.7


-
This SF.n

[kvm-devel] [PATCH 07/50] KVM: Call x86_decode_insn() only when needed

2007-12-23 Thread Avi Kivity
From: Laurent Vivier <[EMAIL PROTECTED]>

Move emulate_ctxt to kvm_vcpu to keep emulate context when we exit from kvm
module. Call x86_decode_insn() only when needed. Modify x86_emulate_insn() to
not modify the context if it must be re-entered.

Signed-off-by: Laurent Vivier <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |8 -
 drivers/kvm/kvm_main.c|   77 +---
 drivers/kvm/svm.c |9 +++--
 drivers/kvm/vmx.c |9 +++--
 drivers/kvm/x86_emulate.c |   24 --
 5 files changed, 82 insertions(+), 45 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index da9c3aa..e885b19 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -207,6 +207,8 @@ enum {
VCPU_SREG_LDTR,
 };
 
+#include "x86_emulate.h"
+
 struct kvm_pio_request {
unsigned long count;
int cur_count;
@@ -380,6 +382,10 @@ struct kvm_vcpu {
 
int cpuid_nent;
struct kvm_cpuid_entry cpuid_entries[KVM_MAX_CPUID_ENTRIES];
+
+   /* emulate context */
+
+   struct x86_emulate_ctxt emulate_ctxt;
 };
 
 struct kvm_mem_alias {
@@ -555,7 +561,7 @@ enum emulation_result {
 };
 
 int emulate_instruction(struct kvm_vcpu *vcpu, struct kvm_run *run,
-   unsigned long cr2, u16 error_code);
+   unsigned long cr2, u16 error_code, int no_decode);
 void kvm_report_emulation_failure(struct kvm_vcpu *cvpu, const char *context);
 void realmode_lgdt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
 void realmode_lidt(struct kvm_vcpu *vcpu, u16 size, unsigned long address);
diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index 39c54d5..fad3a08 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -1251,45 +1251,56 @@ struct x86_emulate_ops emulate_ops = {
 int emulate_instruction(struct kvm_vcpu *vcpu,
struct kvm_run *run,
unsigned long cr2,
-   u16 error_code)
+   u16 error_code,
+   int no_decode)
 {
-   struct x86_emulate_ctxt emulate_ctxt;
-   int r;
-   int cs_db, cs_l;
+   int r = 0;
 
vcpu->mmio_fault_cr2 = cr2;
kvm_x86_ops->cache_regs(vcpu);
 
-   kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
-
-   emulate_ctxt.vcpu = vcpu;
-   emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
-   emulate_ctxt.cr2 = cr2;
-   emulate_ctxt.mode = (emulate_ctxt.eflags & X86_EFLAGS_VM)
-   ? X86EMUL_MODE_REAL : cs_l
-   ? X86EMUL_MODE_PROT64 : cs_db
-   ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
-
-   if (emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
-   emulate_ctxt.cs_base = 0;
-   emulate_ctxt.ds_base = 0;
-   emulate_ctxt.es_base = 0;
-   emulate_ctxt.ss_base = 0;
-   } else {
-   emulate_ctxt.cs_base = get_segment_base(vcpu, VCPU_SREG_CS);
-   emulate_ctxt.ds_base = get_segment_base(vcpu, VCPU_SREG_DS);
-   emulate_ctxt.es_base = get_segment_base(vcpu, VCPU_SREG_ES);
-   emulate_ctxt.ss_base = get_segment_base(vcpu, VCPU_SREG_SS);
-   }
-
-   emulate_ctxt.gs_base = get_segment_base(vcpu, VCPU_SREG_GS);
-   emulate_ctxt.fs_base = get_segment_base(vcpu, VCPU_SREG_FS);
-
vcpu->mmio_is_write = 0;
vcpu->pio.string = 0;
-   r = x86_decode_insn(&emulate_ctxt, &emulate_ops);
+
+   if (!no_decode) {
+   int cs_db, cs_l;
+   kvm_x86_ops->get_cs_db_l_bits(vcpu, &cs_db, &cs_l);
+
+   vcpu->emulate_ctxt.vcpu = vcpu;
+   vcpu->emulate_ctxt.eflags = kvm_x86_ops->get_rflags(vcpu);
+   vcpu->emulate_ctxt.cr2 = cr2;
+   vcpu->emulate_ctxt.mode =
+   (vcpu->emulate_ctxt.eflags & X86_EFLAGS_VM)
+   ? X86EMUL_MODE_REAL : cs_l
+   ? X86EMUL_MODE_PROT64 : cs_db
+   ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16;
+
+   if (vcpu->emulate_ctxt.mode == X86EMUL_MODE_PROT64) {
+   vcpu->emulate_ctxt.cs_base = 0;
+   vcpu->emulate_ctxt.ds_base = 0;
+   vcpu->emulate_ctxt.es_base = 0;
+   vcpu->emulate_ctxt.ss_base = 0;
+   } else {
+   vcpu->emulate_ctxt.cs_base =
+   get_segment_base(vcpu, VCPU_SREG_CS);
+   vcpu->emulate_ctxt.ds_base =
+   get_segment_base(vcpu, VCPU_SREG_DS);
+   vcpu->emulate_ctxt.es_base =
+   get_segment_base(vcpu, VCPU_SREG_ES);
+

[kvm-devel] [PATCH 10/50] KVM: MMU: Make flooding detection work when guest page faults are bypassed

2007-12-23 Thread Avi Kivity
When we allow guest page faults to reach the guests directly, we lose
the fault tracking which allows us to detect demand paging.  So we provide
an alternate mechnism by clearing the accessed bit when we set a pte, and
checking it later to see if the guest actually used it.

Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm.h |1 +
 drivers/kvm/mmu.c |   21 -
 drivers/kvm/paging_tmpl.h |9 -
 3 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 7de948e..08ffc82 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -346,6 +346,7 @@ struct kvm_vcpu {
 
gfn_t last_pt_write_gfn;
int   last_pt_write_count;
+   u64  *last_pte_updated;
 
struct kvm_guest_debug guest_debug;
 
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 069ce83..d347e89 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -692,6 +692,15 @@ static void kvm_mmu_put_page(struct kvm_mmu_page *page,
mmu_page_remove_parent_pte(page, parent_pte);
 }
 
+static void kvm_mmu_reset_last_pte_updated(struct kvm *kvm)
+{
+   int i;
+
+   for (i = 0; i < KVM_MAX_VCPUS; ++i)
+   if (kvm->vcpus[i])
+   kvm->vcpus[i]->last_pte_updated = NULL;
+}
+
 static void kvm_mmu_zap_page(struct kvm *kvm,
 struct kvm_mmu_page *page)
 {
@@ -717,6 +726,7 @@ static void kvm_mmu_zap_page(struct kvm *kvm,
kvm_mmu_free_page(kvm, page);
} else
list_move(&page->link, &kvm->active_mmu_pages);
+   kvm_mmu_reset_last_pte_updated(kvm);
 }
 
 static int kvm_mmu_unprotect_page(struct kvm_vcpu *vcpu, gfn_t gfn)
@@ -1140,6 +1150,13 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu,
offset_in_pte);
 }
 
+static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
+{
+   u64 *spte = vcpu->last_pte_updated;
+
+   return !!(spte && (*spte & PT_ACCESSED_MASK));
+}
+
 void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
   const u8 *new, int bytes)
 {
@@ -1160,13 +1177,15 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 
pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
kvm_mmu_audit(vcpu, "pre pte write");
-   if (gfn == vcpu->last_pt_write_gfn) {
+   if (gfn == vcpu->last_pt_write_gfn
+   && !last_updated_pte_accessed(vcpu)) {
++vcpu->last_pt_write_count;
if (vcpu->last_pt_write_count >= 3)
flooded = 1;
} else {
vcpu->last_pt_write_gfn = gfn;
vcpu->last_pt_write_count = 1;
+   vcpu->last_pte_updated = NULL;
}
index = kvm_page_table_hashfn(gfn) % KVM_NUM_MMU_PAGES;
bucket = &vcpu->kvm->mmu_page_hash[index];
diff --git a/drivers/kvm/paging_tmpl.h b/drivers/kvm/paging_tmpl.h
index 99ac9b1..be0f852 100644
--- a/drivers/kvm/paging_tmpl.h
+++ b/drivers/kvm/paging_tmpl.h
@@ -238,7 +238,12 @@ static void FNAME(set_pte_common)(struct kvm_vcpu *vcpu,
FNAME(mark_pagetable_dirty)(vcpu->kvm, walker);
}
 
-   spte = PT_PRESENT_MASK | PT_ACCESSED_MASK | PT_DIRTY_MASK;
+   /*
+* We don't set the accessed bit, since we sometimes want to see
+* whether the guest actually used the pte (in order to detect
+* demand paging).
+*/
+   spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
spte |= gpte & PT64_NX_MASK;
if (!dirty)
access_bits &= ~PT_WRITABLE_MASK;
@@ -291,6 +296,8 @@ unshadowed:
page_header_update_slot(vcpu->kvm, shadow_pte, gaddr);
if (!was_rmapped)
rmap_add(vcpu, shadow_pte);
+   if (!ptwrite || !*ptwrite)
+   vcpu->last_pte_updated = shadow_pte;
 }
 
 static void FNAME(set_pte)(struct kvm_vcpu *vcpu, pt_element_t gpte,
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 11/50] KVM: MMU: Ignore reserved bits in cr3 in non-pae mode

2007-12-23 Thread Avi Kivity
From: Ryan Harper <[EMAIL PROTECTED]>

This patch removes the fault injected when the guest attempts to set reserved
bits in cr3.  X86 hardware doesn't generate a fault when setting reserved bits.
The result of this patch is that vmware-server, running within a kvm guest,
boots and runs memtest from an iso.

Signed-off-by: Ryan Harper <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 drivers/kvm/kvm_main.c |   11 ---
 1 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/kvm/kvm_main.c b/drivers/kvm/kvm_main.c
index da057cf..b10fd7e 100644
--- a/drivers/kvm/kvm_main.c
+++ b/drivers/kvm/kvm_main.c
@@ -554,14 +554,11 @@ void set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
inject_gp(vcpu);
return;
}
-   } else {
-   if (cr3 & CR3_NONPAE_RESERVED_BITS) {
-   printk(KERN_DEBUG
-  "set_cr3: #GP, reserved bits\n");
-   inject_gp(vcpu);
-   return;
-   }
}
+   /*
+* We don't check reserved bits in nonpae mode, because
+* this isn't enforced, and VMware depends on this.
+*/
}
 
mutex_lock(&vcpu->kvm->lock);
-- 
1.5.3.7


-
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2005.
http://clk.atdmt.com/MRT/go/vse012070mrt/direct/01/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


<    10   11   12   13   14   15   16   17   18   19   >