Re: [kvm-devel] guest PTE write emulation

2007-05-30 Thread Dong, Eddie
Avi Kivity wrote:
> On a pte update, npte will always be 1.  On a pde update, we won't do
> anything in mmu_pte_write_new_pte because it doesn't handle
> pdes.  If we
> extend it to handle pdes, then we need either to modify the
> new gpde or
> to have the update take the quadrant into account.

Agree, so we either just skip npte=2 branch or polish it.
How about following changes?  

diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index c85c664..37a7dc5 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1156,7 +1156,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
gpa_t gpa,
struct hlist_node *node, *n;
struct hlist_head *bucket;
unsigned index;
-   u64 *spte;
+   u64 *spte, gpte[2];
unsigned offset = offset_in_page(gpa);
unsigned pte_size;
unsigned page_offset;
@@ -1164,7 +1164,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
gpa_t gpa,
unsigned quadrant;
int level;
int flooded = 0;
-   int npte;
+   int npte, i;
 
pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes);
if (gfn == vcpu->last_pt_write_gfn) {
@@ -1202,6 +1202,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
gpa_t gpa,
page_offset = offset;
level = page->role.level;
npte = 1;
+   gpte[0] = *(long*)new;
if (page->role.glevels == PT32_ROOT_LEVEL) {
page_offset <<= 1;  /* 32->64 */
/*
@@ -1209,10 +1210,16 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
gpa_t gpa,
 * only 2MB.  So we need to double the offset
again
 * and zap two pdes instead of one.
 */
-   if (level == PT32_ROOT_LEVEL) {
+   if ((level == PT_DIRECTORY_LEVEL) &&
+   (vcpu->cr4 & CR4_PSE_MASK) &&
+   (gpte[0] & PT_PAGE_SIZE_MASK)) {
page_offset &= ~7; /* kill rounding
error */
page_offset <<= 1;
npte = 2;
+   gpte[1] = gpte[0];
+   gpte[1] += 1 << 21;
}
quadrant = page_offset >> PAGE_SHIFT;
page_offset &= ~PAGE_MASK;
@@ -1220,9 +1227,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
gpa_t gpa,
continue;
}
spte = &page->spt[page_offset / sizeof(*spte)];
-   while (npte--) {
+   for (i=0; i < npte; i++) {
mmu_pte_write_zap_pte(vcpu, page, spte);
-   mmu_pte_write_new_pte(vcpu, page, spte, new,
bytes);
+   mmu_pte_write_new_pte(vcpu, page, spte,
&gpte[i], bytes);
++spte;
}
}

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] guest PTE write emulation

2007-05-30 Thread Avi Kivity
Dong, Eddie wrote:
> Avi Kivity wrote:
>   
>> On a pte update, npte will always be 1.  On a pde update, we won't do
>> anything in mmu_pte_write_new_pte because it doesn't handle
>> pdes.  If we
>> extend it to handle pdes, then we need either to modify the
>> new gpde or
>> to have the update take the quadrant into account.
>> 
>
> Agree, so we either just skip npte=2 branch or polish it.
> How about following changes?  
>   

Isn't it all dead code, as gpte[1] will never be used?

Maybe just move the page->role.level test from mmu_pte_write_new_pte() 
back to kvm_mmu_pte_write(), so that it's clear that the new data isn't 
used for the non-pte case?


-- 
error compiling committee.c: too many arguments to function


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] guest PTE write emulation

2007-05-30 Thread Dong, Eddie
Avi Kivity wrote:
> Dong, Eddie wrote:
>> Avi Kivity wrote:
>> 
>>> On a pte update, npte will always be 1.  On a pde update, we won't
>>> do anything in mmu_pte_write_new_pte because it doesn't handle
>>> pdes.  If we extend it to handle pdes, then we need either to
>>> modify the new gpde or to have the update take the quadrant into
>>> account. 
>>> 
>> 
>> Agree, so we either just skip npte=2 branch or polish it. How about
>> following changes? 
>> 
> 
> Isn't it all dead code, as gpte[1] will never be used?

If removing dead code, we can do like following:

diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index c85c664..7d7cd42 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -1209,11 +1209,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu,
gpa_t gpa,
 * only 2MB.  So we need to double the offset
again
 * and zap two pdes instead of one.
 */
-   if (level == PT32_ROOT_LEVEL) {
-   page_offset &= ~7; /* kill rounding
error */
-   page_offset <<= 1;
-   npte = 2;
-   }
quadrant = page_offset >> PAGE_SHIFT;
page_offset &= ~PAGE_MASK;
if (quadrant != page->role.quadrant)


> 
> Maybe just move the page->role.level test from mmu_pte_write_new_pte()

role test need to combine with guest CR4 and PS bits.
Current code find about 3.5K times of fake 4M page.

> back to kvm_mmu_pte_write(), so that it's clear that the new
> data isn't
> used for the non-pte case?

Agree.

Eddie

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 0/3] KVM paravirt_ops implementation

2007-05-30 Thread Anthony Liguori
This is the start of a paravirt_ops implementation for KVM.  Most of it 
was done by Ingo Molnar, I just moved things around a bit.  I don't 
think there's a measurable performance benefit just yet but there are a 
few more optimizations that I think we can get in time for 2.6.23 that 
will be measurable.

Regards,

Anthony Liguori

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure

2007-05-30 Thread Anthony Liguori

Regards,

Anthony Liguori
Subject: [PATCH] Add KVM paravirt_ops implementation
From: Anthony Liguori <[EMAIL PROTECTED]>

This patch adds the basic infrastructure for paravirtualizing a KVM guest.
Discovery of running under KVM is done by sharing a page of memory between
the guest and host (initially through an MSR write).

This is based on a patch written by Ingo Molnar.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

Index: kvm/arch/i386/Kconfig
===
--- kvm.orig/arch/i386/Kconfig	2007-05-30 08:42:31.0 -0500
+++ kvm/arch/i386/Kconfig	2007-05-30 08:42:38.0 -0500
@@ -231,6 +231,13 @@
 	  at the moment), by linking the kernel to a GPL-ed ROM module
 	  provided by the hypervisor.
 
+config KVM_GUEST
+	bool "KVM paravirt-ops support"
+	depends on PARAVIRT
+	help
+	  This option enables various optimizations for running under the KVM
+  hypervisor.
+
 config ACPI_SRAT
 	bool
 	default y
Index: kvm/arch/i386/kernel/Makefile
===
--- kvm.orig/arch/i386/kernel/Makefile	2007-05-30 08:42:26.0 -0500
+++ kvm/arch/i386/kernel/Makefile	2007-05-30 08:42:38.0 -0500
@@ -41,6 +41,7 @@
 obj-$(CONFIG_K8_NB)		+= k8.o
 
 obj-$(CONFIG_VMI)		+= vmi.o vmiclock.o
+obj-$(CONFIG_KVM_GUEST)		+= kvm.o
 obj-$(CONFIG_PARAVIRT)		+= paravirt.o
 obj-y+= pcspeaker.o
 
Index: kvm/arch/i386/kernel/kvm.c
===
--- /dev/null	1970-01-01 00:00:00.0 +
+++ kvm/arch/i386/kernel/kvm.c	2007-05-30 09:29:37.0 -0500
@@ -0,0 +1,100 @@
+/*
+ * KVM paravirt_ops implementation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ *
+ * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <[EMAIL PROTECTED]>
+ * Copyright IBM Corporation, 2007
+ *   Authors: Anthony Liguori <[EMAIL PROTECTED]>
+ */
+
+#include 
+#include 
+#include 
+#include 
+
+static DEFINE_PER_CPU(struct kvm_vcpu_para_state, para_state);
+extern unsigned char hypercall_addr[4];
+
+static void kvm_guest_setup(void)
+{
+	paravirt_ops.name = "KVM";
+	paravirt_ops.paravirt_enabled = 1;
+}
+
+/*
+ * This is the vm-syscall address - to be patched by the host to
+ * VMCALL (Intel) or VMMCALL (AMD), depending on the CPU model:
+ */
+asm (
+	".globl hypercall_addr\n"
+	".align 4\n"
+	"hypercall_addr:\n"
+	"movl $-38, %eax\n"
+	"ret\n"
+);
+
+static int kvm_guest_register_para(int cpu)
+{
+	struct kvm_vcpu_para_state *para_state = &per_cpu(para_state, cpu);
+
+	printk(KERN_DEBUG "kvm guest on VCPU#%d: trying to register para_state %p\n",
+	   cpu, para_state);
+
+	/*
+	 * Try to write to a magic MSR (which is invalid on any real CPU),
+	 * and thus signal to KVM that we wish to entering paravirtualized
+	 * mode:
+	 */
+	para_state->guest_version = KVM_PARA_API_VERSION;
+	para_state->host_version = -1;
+	para_state->size = sizeof(*para_state);
+	para_state->ret = 0;
+	para_state->hypercall_gpa = __pa(hypercall_addr);
+
+	if (wrmsr_safe(MSR_KVM_API_MAGIC, __pa(para_state), 0)) {
+		printk(KERN_INFO "KVM guest: WRMSR probe failed.\n");
+		return -ENOENT;
+	}
+
+	printk(KERN_DEBUG "kvm guest: host returned %d\n", para_state->ret);
+	printk(KERN_DEBUG "kvm guest: host version: %d\n", para_state->host_version);
+	printk(KERN_DEBUG "kvm guest: syscall entry: %02x %02x %02x %02x\n",
+	   hypercall_addr[0], hypercall_addr[1],
+	   hypercall_addr[2], hypercall_addr[3]);
+
+	if (para_state->ret) {
+		printk(KERN_ERR "kvm guest: host refused registration.\n");
+		return para_state->ret;
+	}
+
+	return 0;
+}
+
+static int __init kvm_guest_init(void)
+{
+	int rc;
+
+	rc = kvm_guest_register_para(smp_processor_id());
+	if (rc) {
+		printk(KERN_INFO "paravirt KVM unavailable\n");
+		goto out;
+	}
+
+	kvm_guest_setup();
+ out:
+	return 0;
+}
+core_initcall(kvm_guest_init);
-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists

[kvm-devel] [PATCH 2/3][PARAVIRT] Make IO delay a NOP

2007-05-30 Thread Anthony Liguori

Regards,

Anthony Liguori
Subject: [PATCH][PARAVIRT] Make IO delay a NOP for paravirt guests

No delay is required in between PIO operations under KVM guests so make IO
delay a NOP.  This was originally part of Ingo Molnar's paravirt series.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

Index: kvm/arch/i386/kernel/kvm.c
===
--- kvm.orig/arch/i386/kernel/kvm.c	2007-05-30 09:30:42.0 -0500
+++ kvm/arch/i386/kernel/kvm.c	2007-05-30 09:31:46.0 -0500
@@ -28,9 +28,17 @@
 static DEFINE_PER_CPU(struct kvm_vcpu_para_state, para_state);
 extern unsigned char hypercall_addr[4];
 
+/*
+ * No need for any "IO delay" on KVM
+ */
+static void kvm_io_delay(void)
+{
+}
+
 static void kvm_guest_setup(void)
 {
 	paravirt_ops.name = "KVM";
+	paravirt_ops.io_delay = kvm_io_delay;
 	paravirt_ops.paravirt_enabled = 1;
 }
 
-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Anthony Liguori

Regards,

Anthony Liguori
Subject: [PATCH][PARAVIRT] Eliminate unnecessary CR3 read in TLB flush

This patch eliminates the CR3 read (which would cause a VM exit) in the TLB
flush path.  The patch is based on Ingo Molnar's paravirt series.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

Index: kvm/arch/i386/kernel/kvm.c
===
--- kvm.orig/arch/i386/kernel/kvm.c	2007-05-30 09:13:48.0 -0500
+++ kvm/arch/i386/kernel/kvm.c	2007-05-30 09:14:24.0 -0500
@@ -24,11 +24,35 @@
 #include 
 #include 
 #include 
+#include 
+#include 
+#include 
 
 static DEFINE_PER_CPU(struct kvm_vcpu_para_state, para_state);
 extern unsigned char hypercall_addr[4];
 
 /*
+ * Avoid the VM exit upon cr3 load by using the cached
+ * ->active_mm->pgd value:
+ */
+static void kvm_flush_tlb_user(void)
+{
+	write_cr3(__pa(current->active_mm->pgd));
+}
+
+/*
+ * Avoid VM exit for cr3 read by calling into kvm_flush_tlb_user
+ */
+static fastcall void kvm_flush_tlb_kernel(void)
+{
+	unsigned long orig_cr4 = read_cr4();
+
+	write_cr4(orig_cr4 & ~X86_CR4_PGE);
+	kvm_flush_tlb_user();
+	write_cr4(orig_cr4);
+}
+
+/*
  * No need for any "IO delay" on KVM
  */
 static void kvm_io_delay(void)
@@ -38,6 +62,8 @@
 static void kvm_guest_setup(void)
 {
 	paravirt_ops.name = "KVM";
+	paravirt_ops.flush_tlb_user = kvm_flush_tlb_user;
+	paravirt_ops.flush_tlb_kernel = kvm_flush_tlb_kernel;
 	paravirt_ops.io_delay = kvm_io_delay;
 	paravirt_ops.paravirt_enabled = 1;
 }
-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Andi Kleen
On Wednesday 30 May 2007 16:53:41 Anthony Liguori wrote:
> Subject: [PATCH][PARAVIRT] Eliminate unnecessary CR3 read in TLB flush
> 
> This patch eliminates the CR3 read (which would cause a VM exit) in the TLB
> flush path.  The patch is based on Ingo Molnar's paravirt series.
> 

This change could be just done generically for the native architecture, 
couldn't 
it?

-Andi

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] Hypercall numbering and the -rt tree

2007-05-30 Thread Anthony Liguori
Hi Ingo,

In my last series, I avoided submitting any patches that used 
hypercalls.  Part of the reason is that I know that your tree already 
has assigned hypercalls out to various things.  Some make sense (like 
flush_cr3_cache) but some are, at least, contentious (like the pv disk 
hypercalls).

I'd like to start using hypercalls but this means squashing over some of 
the hypercalls that are defined in the -rt tree.  Do you have a problem 
with this?  Is this going to cause major breakages for anyone?

Regards,

Anthony Liguori

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Anthony Liguori

Andi Kleen wrote:

On Wednesday 30 May 2007 16:53:41 Anthony Liguori wrote:
  

Subject: [PATCH][PARAVIRT] Eliminate unnecessary CR3 read in TLB flush

This patch eliminates the CR3 read (which would cause a VM exit) in the TLB
flush path.  The patch is based on Ingo Molnar's paravirt series.




This change could be just done generically for the native architecture, couldn't 
it?
  


Sure.  It adds a few more cycles onto native though (two memory reads, 
and some math).  How does the following look?


Regards,

Anthony Liguori


-Andi
  


Subject: [PATCH] Avoid reading CR3 on TLB flush
From: Anthony Liguori <[EMAIL PROTECTED]>

In a virtualized environment, there is significant overhead in reading and
writing control registers.  Since we already have the value of CR3 in
current->active_mm->pgd, we can avoid taking the exit on CR3 read when
doing a TLB flush.

Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>

Index: kvm/include/asm-i386/tlbflush.h
===
--- kvm.orig/include/asm-i386/tlbflush.h	2007-05-30 10:22:48.0 -0500
+++ kvm/include/asm-i386/tlbflush.h	2007-05-30 10:22:54.0 -0500
@@ -14,13 +14,12 @@
 
 #define __native_flush_tlb()		\
 	do {\
-		unsigned int tmpreg;	\
+		unsigned int cr3 = __pa(current->active_mm->pgd);	\
 	\
 		__asm__ __volatile__(	\
-			"movl %%cr3, %0;  \n"		\
 			"movl %0, %%cr3;  # flush TLB \n"		\
-			: "=r" (tmpreg)	\
-			:: "memory");	\
+			:: "r" (cr3)   	\
+			: "memory");	\
 	} while (0)
 
 /*
@@ -29,18 +28,18 @@
  */
 #define __native_flush_tlb_global()	\
 	do {\
-		unsigned int tmpreg, cr4, cr4_orig;			\
+		unsigned int cr3 = __pa(current->active_mm->pgd);	\
+		unsigned int cr4, cr4_orig;\
 	\
 		__asm__ __volatile__(	\
-			"movl %%cr4, %2;  # turn off PGE \n"	\
-			"movl %2, %1;\n"	\
-			"andl %3, %1;\n"	\
-			"movl %1, %%cr4; \n"	\
-			"movl %%cr3, %0; \n"	\
-			"movl %0, %%cr3;  # flush TLB\n"	\
-			"movl %2, %%cr4;  # turn PGE back on \n"	\
-			: "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig)	\
-			: "i" (~X86_CR4_PGE)\
+			"movl %%cr4, %1;  # turn off PGE \n"	\
+			"movl %1, %0;\n"	\
+			"andl %2, %0;\n"	\
+			"movl %0, %%cr4; \n"	\
+			"movl %3, %%cr3;  # flush TLB\n"	\
+			"movl %1, %%cr4;  # turn PGE back on \n"	\
+			: "=&r" (cr4), "=&r" (cr4_orig)			\
+			: "i" (~X86_CR4_PGE), "r" (cr3)			\
 			: "memory");	\
 	} while (0)
 
-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Jeremy Fitzhardinge
Anthony Liguori wrote:
> Sure.  It adds a few more cycles onto native though (two memory reads,
> and some math).

As opposed to a serializing control-register read?  I think that's
probably a win.

J

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] Could not start KVM 24

2007-05-30 Thread ymed
>>> >I have used KVM 16 in x86_64 OpenSUSE 10.2 without problems.
>>
>>> >I have recently downloaded and installed x86_64 RPMs of KVM 24.
>>
>>> >But I could not start it:
>>
>>> >
>>
>>> >#qemu-kvm
>>
>>> >open /dev/kvm: No such file or directory
>>
>>> >Could not initialize KVM, will disable KVM support
>>
>>> >
>>
>>> >I have Intel VT enabled in my PC and
>>
>>> >MODULES_LOADED_ON_BOOT="kvm kvm-intel"
>>
>>> >
>>
>>> Please make sure the modules are really loaded (/sbin/lsmod|grep kvm)
>>
>>> Check of you see /dev/kvm and its permissions.
>>
>>
>>
>>The result of the command:
>>
>>/sbin/lsmod|grep kvm
>>
>>was:
>>
>>KVM   866160
>>I did not found any /dev/kvm file...
>
>You don't have kvm_intel module loaded.
>If it fails loading please check dmesg.

Thank you Dor!
This is output of dmesg below. I could not understand why KVM 16 worked 
fine but KVM 25 or 26 does not work in OpenSUSE. Unfortunately, it's not 
possible to find KVM 16 because this version was replaced.

linux-cyu9:~ # dmesg
Bootdata ok (command line is root=/dev/sda3 vga=0x314resume=/dev/sda2 
splash=silent)
Linux version 2.6.18.2-34-default ([EMAIL PROTECTED]) (gcc version 4.1.2 
20061115 (prerelease) (SUSE Linux)) #1 SMP Mon Nov 27 11:46:27 UTC 2006
BIOS-provided physical RAM map:
 BIOS-e820:  - 0008f000 (usable)
 BIOS-e820: 0008f000 - 000a (reserved)
 BIOS-e820: 000e - 0010 (reserved)
 BIOS-e820: 0010 - 7e599000 (usable)
 BIOS-e820: 7e599000 - 7e5a6000 (reserved)
 BIOS-e820: 7e5a6000 - 7e656000 (usable)
 BIOS-e820: 7e656000 - 7e6a7000 (ACPI NVS)
 BIOS-e820: 7e6a7000 - 7e6ac000 (ACPI data)
 BIOS-e820: 7e6ac000 - 7e6f2000 (ACPI NVS)
 BIOS-e820: 7e6f2000 - 7e6f3000 (usable)
 BIOS-e820: 7e6f3000 - 7e6ff000 (ACPI data)
 BIOS-e820: 7e6ff000 - 7e70 (usable)
 BIOS-e820: 7e70 - 7f00 (reserved)
 BIOS-e820: fff0 - 0001 (reserved)
DMI 2.4 present.
ACPI: RSDP (v000 INTEL ) @ 0x000fe020
ACPI: RSDT (v001 INTEL  DG965WH  0x0629  0x0113) @ 
0x7e6fd038
ACPI: FADT (v001 INTEL  DG965WH  0x0629 MSFT 0x0113) @ 
0x7e6fc000
ACPI: MADT (v001 INTEL  DG965WH  0x0629 MSFT 0x0113) @ 
0x7e6f6000
ACPI: WDDT (v001 INTEL  DG965WH  0x0629 MSFT 0x0113) @ 
0x7e6f5000
ACPI: MCFG (v001 INTEL  DG965WH  0x0629 MSFT 0x0113) @ 
0x7e6f4000
ACPI: ASF! (v032 INTEL  DG965WH  0x0629 MSFT 0x0113) @ 
0x7e6f3000
ACPI: SSDT (v001 INTEL CpuPm 0x0629 MSFT 0x0113) @ 
0x7e6ab000
ACPI: SSDT (v001 INTEL   Cpu0Ist 0x0629 MSFT 0x0113) @ 
0x7e6aa000
ACPI: SSDT (v001 INTEL   Cpu1Ist 0x0629 MSFT 0x0113) @ 
0x7e6a9000
ACPI: SSDT (v001 INTEL   Cpu2Ist 0x0629 MSFT 0x0113) @ 
0x7e6a8000
ACPI: SSDT (v001 INTEL   Cpu3Ist 0x0629 MSFT 0x0113) @ 
0x7e6a7000
ACPI: DSDT (v001 INTEL  DG965WH  0x0629 MSFT 0x0113) @ 
0x
No NUMA configuration found
Faking a node at -7e70
Bootmem setup node 0 -7e70
On node 0 totalpages: 509464
  DMA zone: 2877 pages, LIFO batch:0
  DMA32 zone: 506587 pages, LIFO batch:31
ACPI: PM-Timer IO Port: 0x408
ACPI: Local APIC address 0xfee0
ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled)
Processor #0 6:15 APIC version 20
ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled)
Processor #1 6:15 APIC version 20
ACPI: LAPIC (acpi_id[0x03] lapic_id[0x82] disabled)
ACPI: LAPIC (acpi_id[0x04] lapic_id[0x83] disabled)
ACPI: LAPIC_NMI (acpi_id[0x01] dfl dfl lint[0x1])
ACPI: LAPIC_NMI (acpi_id[0x02] dfl dfl lint[0x1])
ACPI: IOAPIC (id[0x02] address[0xfec0] gsi_base[0])
IOAPIC[0]: apic_id 2, version 32, address 0xfec0, GSI 0-23
ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl)
ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level)
ACPI: IRQ0 used by override.
ACPI: IRQ2 used by override.
ACPI: IRQ9 used by override.
Setting APIC routing to physical flat
Using ACPI (MADT) for SMP configuration information
Allocating PCI resources starting at 8000 (gap: 7f00:80f0)
SMP: Allowing 4 CPUs, 2 hotplug CPUs
Built 1 zonelists.  Total pages: 509464
Kernel command line: root=/dev/sda3 vga=0x314resume=/dev/sda2 splash=silent
bootsplash: silent mode.
Initializing CPU#0
PID hash table entries: 4096 (order: 12, 32768 bytes)
time.c: Using 3.579545 MHz WALL PM GTOD PIT/TSC timer.
time.c: Detected 2131.258 MHz processor.
Console: colour dummy device 80x25
Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes)
Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes)
Checking aperture...
Memory: 2031144k/2071552k available (1915k kernel code, 39232k reserved, 1278k 
data, 188k init)
Calibratin

Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure

2007-05-30 Thread Nakajima, Jun
Anthony Liguori wrote:
> Regards,
> 
> Anthony Liguori

I think we should use the CPUID instruction (leaf 0x4000) to detect
the hypervosor as we are doing in Xen. 

Jun
---
Intel Open Source Technology Center

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Nakajima, Jun
Jeremy Fitzhardinge wrote:
> Anthony Liguori wrote:
> > Sure.  It adds a few more cycles onto native though (two memory
reads,
> > and some math).
> 
> As opposed to a serializing control-register read?  I think that's
> probably a win.
> 
> J
> 

And actually you don't need the write to CR3 to flush TLB because the
one to CR4 does it. Or does kvm_flush_tlb_kernel assume that CR3 is
updated at the same time?

Jun
---
Intel Open Source Technology Center

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] State of current pv_ops backend for KVM?

2007-05-30 Thread Zachary Amsden
Anthony Liguori wrote:
> Rusty Russell wrote:
>> On Mon, 2007-05-28 at 20:54 -0500, Anthony Liguori wrote:
>>  
>>> Howdy,
>>>
>>> Does anyone know what the state of a pv_ops backend for KVM is?  I 
>>> know Ingo has an implementation that implements CR3 caching but I 
>>> don't see any branches in Avi's git tree.
>>>
>>> Perhaps we should try for a simple pv_ops backend for 2.6.23 seeing 
>>> as how the host infrastructure is there?  I'd be willing to do some 
>>> leg work here...
>>> 
>>
>> "Me too".  It's at least worth toying with the mmu batching stuff to see
>> if that can shave some cycles there: it seems that cr3 caching is the
>> main win for context switch so batching there won't help.
>>   
>
> I'm working on a simple patch that just adds the infrastructure.
>
>> IIRC Ingo used a magic MSR to detect kvm, and there was a question mark
>> over that.
>
> I am not too worried about using an MSR.  There's really no fool proof 
> method here.  The only thing to do IMHO is move the kvm pv_ops backend 
> into a separate file and add appropriate Kconfig stuff.
>
>>   VMI uses a different technique, which is probably worth
>> considering...
>>   
>
> VMI can determine that it's under VMware since VMware has to publish 
> an option ROM.  It's quite neat, but overkill for what we're doing 
> (since we're not going to be using a ROM anyway).

The near to being blessed way to do this is to use a CPUID leaf (I 
believe it is 0x4000) to identify the hypervisor.  I don't have the 
exact details, but making CPUID 0x4000 return 'KVMKVMKVMKVM' sounds 
like a pretty safe detection technique.

We're probably dropping the ROM probing in favor of this for 64-bit.

Zach

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure

2007-05-30 Thread Anthony Liguori
Nakajima, Jun wrote:
> Anthony Liguori wrote:
>   
>> Regards,
>>
>> Anthony Liguori
>> 
>
> I think we should use the CPUID instruction (leaf 0x4000) to detect
> the hypervosor as we are doing in Xen. 
>   

Is that leaf reserved for such use by Intel?

Regards,

Anthony Liguori

> Jun
> ---
> Intel Open Source Technology Center
>   


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] State of current pv_ops backend for KVM?

2007-05-30 Thread Anthony Liguori
Zachary Amsden wrote:
>> VMI can determine that it's under VMware since VMware has to publish 
>> an option ROM.  It's quite neat, but overkill for what we're doing 
>> (since we're not going to be using a ROM anyway).
>
> The near to being blessed way to do this is to use a CPUID leaf (I 
> believe it is 0x4000) to identify the hypervisor.  I don't have 
> the exact details, but making CPUID 0x4000 return 'KVMKVMKVMKVM' 
> sounds like a pretty safe detection technique.

I apparently missed the memo :-)  I'll update the patch.

Regards,

Anthony Liguori

> We're probably dropping the ROM probing in favor of this for 64-bit.


> Zach
>


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Zachary Amsden
Nakajima, Jun wrote:
> And actually you don't need the write to CR3 to flush TLB because the
> one to CR4 does it. Or does kvm_flush_tlb_kernel assume that CR3 is
> updated at the same time?
>
> Jun

It should not be necessary, but I believe this was added as a workaround 
to a PII erratum.  I can't find the erratum, however, and the history of 
using G bits in Linux is complicated (several bugs introduced and many 
intermediate versions of this code).  Since this is not performance 
critical, I think it is probably best to leave the CR3 reload.

However, being unnecessary on modern processors, I already submitted a 
patch to eliminate it on 64-bit (or maybe just told Andi about it, I 
can't recall).

Zach

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure

2007-05-30 Thread Nakajima, Jun
Anthony Liguori wrote:
> Nakajima, Jun wrote:
> > Anthony Liguori wrote:
> > 
> > > Regards,
> > > 
> > > Anthony Liguori
> > > 
> > 
> > I think we should use the CPUID instruction (leaf 0x4000) to
detect
> > the hypervosor as we are doing in Xen.
> > 
> 
> Is that leaf reserved for such use by Intel?
> 

What I can say is that we (including the H/W teams) reviewed it
internally.

> Regards,
> 
> Anthony Liguori
> 


Jun
---
Intel Open Source Technology Center

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Nakajima, Jun
Zachary Amsden wrote:
> Nakajima, Jun wrote:
> > And actually you don't need the write to CR3 to flush TLB because
the
> > one to CR4 does it. Or does kvm_flush_tlb_kernel assume that CR3 is
updated
> > at the same time? 
> > 
> > Jun
> 
> It should not be necessary, but I believe this was added as a
workaround
> to a PII erratum.  I can't find the erratum, however, and the history
of
> using G bits in Linux is complicated (several bugs introduced and many
> intermediate versions of this code).  Since this is not performance
> critical, I think it is probably best to leave the CR3 reload.

I don't recommend this for old processors.

> 
> However, being unnecessary on modern processors, I already submitted a
> patch to eliminate it on 64-bit (or maybe just told Andi about it, I
> can't recall).
> 
> Zach

For KVM, it should be okay as well. But we can replace two CR4 accesses
with just one hypercall.

Jun
---
Intel Open Source Technology Center

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Anthony Liguori
Nakajima, Jun wrote:
> Zachary Amsden wrote:
>   
>> Nakajima, Jun wrote:
>> 
>>> And actually you don't need the write to CR3 to flush TLB because
>>>   
> the
>   
>>> one to CR4 does it. Or does kvm_flush_tlb_kernel assume that CR3 is
>>>   
> updated
>   
>>> at the same time? 
>>>
>>> Jun
>>>   
>> It should not be necessary, but I believe this was added as a
>> 
> workaround
>   
>> to a PII erratum.  I can't find the erratum, however, and the history
>> 
> of
>   
>> using G bits in Linux is complicated (several bugs introduced and many
>> intermediate versions of this code).  Since this is not performance
>> critical, I think it is probably best to leave the CR3 reload.
>> 
>
> I don't recommend this for old processors.
>
>   
>> However, being unnecessary on modern processors, I already submitted a
>> patch to eliminate it on 64-bit (or maybe just told Andi about it, I
>> can't recall).
>>
>> Zach
>> 
>
> For KVM, it should be okay as well. But we can replace two CR4 accesses
> with just one hypercall.
>   

I was thinking the same thing :-)

I was actually thinking about adding a hypercall to set/clear a bit in a 
control register.  The thought here is that it would be useful not just 
for the global bit but also for CR0.TS although we would need another 
paravirt_op hook for stts.

Regards,

Anthony Liguori

> Jun
> ---
> Intel Open Source Technology Center
>   


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Nakajima, Jun
Anthony Liguori wrote:
> Nakajima, Jun wrote:



> > For KVM, it should be okay as well. But we can replace two CR4
accesses
> > with just one hypercall. 
> > 
> 
> I was thinking the same thing :-)
> 
> I was actually thinking about adding a hypercall to set/clear a bit in
a
> control register.  The thought here is that it would be useful not
just
> for the global bit but also for CR0.TS although we would need another
> paravirt_op hook for stts.

Given the optimizations for CPU virtualization in the current H/W, I'm
not sure if such hooks are useful. Do you have any performance data that
justify such hooks? 

> 
> Regards,
> 
> Anthony Liguori
> 


Jun
---
Intel Open Source Technology Center

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Zachary Amsden
Anthony Liguori wrote:
>
> I was thinking the same thing :-)
>
> I was actually thinking about adding a hypercall to set/clear a bit in 
> a control register.  The thought here is that it would be useful not 
> just for the global bit but also for CR0.TS although we would need 
> another paravirt_op hook for stts.

You don't need STTS: just cache CR0 value on writes and replace 
read_cr0.  More paravirt_op hooks would likely be frowned upon, we've 
already got too many.

Zach

-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Anthony Liguori
Nakajima, Jun wrote:
> Anthony Liguori wrote:
>   
>
> Given the optimizations for CPU virtualization in the current H/W, I'm
> not sure if such hooks are useful. Do you have any performance data that
> justify such hooks? 
>   

No, I don't.  It was just a thought that was yet to be confirmed.

Regards,

Anthony Liguori

>> Regards,
>>
>> Anthony Liguori
>>
>> 
>
>
> Jun
> ---
> Intel Open Source Technology Center
>
> -
> This SF.net email is sponsored by DB2 Express
> Download DB2 Express C - the FREE version of DB2 express and take
> control of your XML. No limits. Just data. Click to get it now.
> http://sourceforge.net/powerbar/db2/
> ___
> kvm-devel mailing list
> kvm-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/kvm-devel
>
>   


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure

2007-05-30 Thread Rusty Russell
On Wed, 2007-05-30 at 09:52 -0500, Anthony Liguori wrote:
> This patch adds the basic infrastructure for paravirtualizing a KVM
> guest.

Hi Anthony!

Nice patch, comments below.

> Discovery of running under KVM is done by sharing a page of memory
> between
> the guest and host (initially through an MSR write).

I missed the shared page in this patch?  If you are going to do that,
perhaps putting the hypercall magic in that page is a good idea?

> +extern unsigned char hypercall_addr[4];

Perhaps in a header?

> +asm (
> +   ".globl hypercall_addr\n"
> +   ".align 4\n"
> +   "hypercall_addr:\n"
> +   "movl $-38, %eax\n"
> +   "ret\n"
> +);

I don't think we want the hypercall returning Linux error numbers, and
magic numbers are bad too.  ud2 here I think.

> +   para_state->guest_version = KVM_PARA_API_VERSION;
> +   para_state->host_version = -1;
> +   para_state->size = sizeof(*para_state);
> +   para_state->ret = 0;
> +   para_state->hypercall_gpa = __pa(hypercall_addr);

Two versions, size *and* ret?  This seems like overkill...

> +   if (wrmsr_safe(MSR_KVM_API_MAGIC, __pa(para_state), 0)) {
> +   printk(KERN_INFO "KVM guest: WRMSR probe failed.\n");
> +   return -ENOENT;
> +   }

How about printk(KERN_INFO "I am not a KVM guest\n");?

> +static int __init kvm_guest_init(void)
> +{
> +   int rc;
> +
> +   rc = kvm_guest_register_para(smp_processor_id());
> +   if (rc) {
> +   printk(KERN_INFO "paravirt KVM unavailable\n");

Double-printk when KVM isn't detected seems overkill.  Perhaps you could
just fold this all into one function...

(Personal gripe: I consider a variable named "rc" to be an admission of
semantic defeat... "err" would be better here...)

Thanks!
Rusty.


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush

2007-05-30 Thread Rusty Russell
On Wed, 2007-05-30 at 14:22 -0500, Anthony Liguori wrote:
> I was actually thinking about adding a hypercall to set/clear a bit in a 
> control register.  The thought here is that it would be useful not just 
> for the global bit but also for CR0.TS although we would need another 
> paravirt_op hook for stts.

We don't really need one, because Linux (i386) only cares about the TS
bit of cr0.  From lguest (you'd want this per-cpu of course):

static unsigned long current_cr0, current_cr3;
static void lguest_write_cr0(unsigned long val)
{
lazy_hcall(LHCALL_TS, val & 8, 0, 0);
current_cr0 = val;
}

static unsigned long lguest_read_cr0(void)
{
return current_cr0;
}

Cheers,
Rusty.



-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure

2007-05-30 Thread Anthony Liguori
Rusty Russell wrote:
> On Wed, 2007-05-30 at 09:52 -0500, Anthony Liguori wrote:
>   
>> This patch adds the basic infrastructure for paravirtualizing a KVM
>> guest.
>> 
>
> Hi Anthony!
>
>   Nice patch, comments below.
>
>   
>> Discovery of running under KVM is done by sharing a page of memory
>> between
>> the guest and host (initially through an MSR write).
>> 
>
> I missed the shared page in this patch?  If you are going to do that,
> perhaps putting the hypercall magic in that page is a good idea?
>   

para_state is the shared page.  The address is passed to the KVM via the 
MSR (so it's a shared page owned by the guest).

>> +extern unsigned char hypercall_addr[4];
>> 
>
> Perhaps in a header?
>
>   
>> +asm (
>> +   ".globl hypercall_addr\n"
>> +   ".align 4\n"
>> +   "hypercall_addr:\n"
>> +   "movl $-38, %eax\n"
>> +   "ret\n"
>> +);
>> 
>
> I don't think we want the hypercall returning Linux error numbers, and
> magic numbers are bad too.  ud2 here I think.
>   

Yeah, you're not the first one to suggest this.  The thing is, KVM 
already has host-side support for a hypercall API.  I didn't want to 
change that unless I had to.  However, based on the prior feedback re: 
using CPUID, I will be changing it so I'll update this too.

>> +   para_state->guest_version = KVM_PARA_API_VERSION;
>> +   para_state->host_version = -1;
>> +   para_state->size = sizeof(*para_state);
>> +   para_state->ret = 0;
>> +   para_state->hypercall_gpa = __pa(hypercall_addr);
>> 
>
> Two versions, size *and* ret?  This seems like overkill...
>   

Yeah, I agree :-)  I actually am not a huge fan of using version 
numbers.  I think I'm going to try the next patch using a single version 
number and a feature bitmap.  Some of the optimizations (like MMU 
batching) don't make sense in a NPT/EPT environment but the guest 
shouldn't have to be aware of that.

>> +   if (wrmsr_safe(MSR_KVM_API_MAGIC, __pa(para_state), 0)) {
>> +   printk(KERN_INFO "KVM guest: WRMSR probe failed.\n");
>> +   return -ENOENT;
>> +   }
>> 
>
> How about printk(KERN_INFO "I am not a KVM guest\n");?
>
>   
>> +static int __init kvm_guest_init(void)
>> +{
>> +   int rc;
>> +
>> +   rc = kvm_guest_register_para(smp_processor_id());
>> +   if (rc) {
>> +   printk(KERN_INFO "paravirt KVM unavailable\n");
>> 
>
> Double-printk when KVM isn't detected seems overkill.  Perhaps you could
> just fold this all into one function...
>   

Already have.

> (Personal gripe: I consider a variable named "rc" to be an admission of
> semantic defeat... "err" would be better here...)
>   

I'm not sure I agree that's one's better than the other.  Although I 
guess if (err) { reads a little better...

Regards,

Anthony Liguori

> Thanks!
> Rusty.
>
>
> -
> This SF.net email is sponsored by DB2 Express
> Download DB2 Express C - the FREE version of DB2 express and take
> control of your XML. No limits. Just data. Click to get it now.
> http://sourceforge.net/powerbar/db2/
> ___
> kvm-devel mailing list
> kvm-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/kvm-devel
>
>   


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


Re: [kvm-devel] Could not start KVM 24

2007-05-30 Thread Dor Laor
>>>
>>>The result of the command:
>>>
>>>/sbin/lsmod|grep kvm
>>>
>>>was:
>>>
>>>KVM   866160
>>>I did not found any /dev/kvm file...
>>
>>You don't have kvm_intel module loaded.
>>If it fails loading please check dmesg.
>
>Thank you Dor!
>This is output of dmesg below. I could not understand why KVM 16 worked
>fine but KVM 25 or 26 does not work in OpenSUSE. Unfortunately, it's
not
>possible to find KVM 16 because this version was replaced.
>
>linux-cyu9:~ # dmesg
>Bootdata ok (command line is root=/dev/sda3 vga=0x314
resume=/dev/sda2
>splash=silent)
>Linux version 2.6.18.2-34-default ([EMAIL PROTECTED]) (gcc version 4.1.2
...snip

>IPv6 over IPv4 tunneling driver
>IA-32 Microcode Update Driver: v1.14a <[EMAIL PROTECTED]>
>audit(1180456935.694:3): audit_pid=2895 old=0 by auid=4294967295
>bootsplash: status on console 0 changed to on
>
>
>

The kvm modules are not loaded. Please insmod kvm and kvm-intel modules.


-
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
___
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel


[kvm-devel] pre-cleanup for SMP: move mmu cache to be VM bais

2007-05-30 Thread Dong, Eddie

Move per VCPU mmu_memory_cache to be VM basis. 

Your opnion?

Eddie

diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
index 0632d0b..77989b4 100644
--- a/drivers/kvm/kvm.h
+++ b/drivers/kvm/kvm.h
@@ -300,11 +300,6 @@ struct kvm_vcpu {
 
struct kvm_mmu mmu;
 
-   struct kvm_mmu_memory_cache mmu_pte_chain_cache;
-   struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
-   struct kvm_mmu_memory_cache mmu_page_cache;
-   struct kvm_mmu_memory_cache mmu_page_header_cache;
-
gfn_t last_pt_write_gfn;
int   last_pt_write_count;
 
@@ -383,6 +378,11 @@ struct kvm {
unsigned long rmap_overflow;
struct list_head vm_list;
struct file *filp;
+
+   struct kvm_mmu_memory_cache mmu_pte_chain_cache;
+   struct kvm_mmu_memory_cache mmu_rmap_desc_cache;
+   struct kvm_mmu_memory_cache mmu_page_cache;
+   struct kvm_mmu_memory_cache mmu_page_header_cache;
 };
 
 struct descriptor_table {
diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c
index 46491b4..b2578a8 100644
--- a/drivers/kvm/mmu.c
+++ b/drivers/kvm/mmu.c
@@ -231,19 +231,19 @@ static int __mmu_topup_memory_caches(struct
kvm_vcpu *vcpu, gfp_t gfp_flags)
 {
int r;
 
-   r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache,
+   r = mmu_topup_memory_cache(&vcpu->kvm->mmu_pte_chain_cache,
   pte_chain_cache, 4, gfp_flags);
if (r)
goto out;
-   r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache,
+   r = mmu_topup_memory_cache(&vcpu->kvm->mmu_rmap_desc_cache,
   rmap_desc_cache, 1, gfp_flags);
if (r)
goto out;
-   r = mmu_topup_memory_cache(&vcpu->mmu_page_cache,
+   r = mmu_topup_memory_cache(&vcpu->kvm->mmu_page_cache,
   mmu_page_cache, 4, gfp_flags);
if (r)
goto out;
-   r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache,
+   r = mmu_topup_memory_cache(&vcpu->kvm->mmu_page_header_cache,
   mmu_page_header_cache, 4, gfp_flags);
 out:
return r;
@@ -266,10 +266,10 @@ static int mmu_topup_memory_caches(struct kvm_vcpu
*vcpu)
 
 static void mmu_free_memory_caches(struct kvm_vcpu *vcpu)
 {
-   mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache);
-   mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache);
-   mmu_free_memory_cache(&vcpu->mmu_page_cache);
-   mmu_free_memory_cache(&vcpu->mmu_page_header_cache);
+   mmu_free_memory_cache(&vcpu->kvm->mmu_pte_chain_cache);
+   mmu_free_memory_cache(&vcpu->kvm->mmu_rmap_desc_cache);
+   mmu_free_memory_cache(&vcpu->kvm->mmu_page_cache);
+   mmu_free_memory_cache(&vcpu->kvm->mmu_page_header_cache);
 }
 
 static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc,
@@ -293,26 +293,26 @@ static void mmu_memory_cache_free(struct
kvm_mmu_memory_cache *mc, void *obj)
 
 static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu)
 {
-   return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache,
+   return mmu_memory_cache_alloc(&vcpu->kvm->mmu_pte_chain_cache,
  sizeof(struct kvm_pte_chain));
 }
 
 static void mmu_free_pte_chain(struct kvm_vcpu *vcpu,
   struct kvm_pte_chain *pc)
 {
-   mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc);
+   mmu_memory_cache_free(&vcpu->kvm->mmu_pte_chain_cache, pc);
 }
 
 static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu)
 {
-   return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache,
+   return mmu_memory_cache_alloc(&vcpu->kvm->mmu_rmap_desc_cache,
  sizeof(struct kvm_rmap_desc));
 }
 
 static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu,
   struct kvm_rmap_desc *rd)
 {
-   mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd);
+   mmu_memory_cache_free(&vcpu->kvm->mmu_rmap_desc_cache, rd);
 }
 
 /*
@@ -471,8 +471,8 @@ static void kvm_mmu_free_page(struct kvm_vcpu *vcpu,
 {
ASSERT(is_empty_shadow_page(page_head->spt));
list_del(&page_head->link);
-   mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt);
-   mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head);
+   mmu_memory_cache_free(&vcpu->kvm->mmu_page_cache,
page_head->spt);
+   mmu_memory_cache_free(&vcpu->kvm->mmu_page_header_cache,
page_head);
++vcpu->kvm->n_free_mmu_pages;
 }
 
@@ -489,9 +489,9 @@ static struct kvm_mmu_page
*kvm_mmu_alloc_page(struct kvm_vcpu *vcpu,
if (!vcpu->kvm->n_free_mmu_pages)
return NULL;
 
-   page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache,
+   page = mmu_memory_cache_alloc(&vcpu->kvm->mmu_page_header_cache,
  sizeof *page);
-   page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache,
PAGE_SIZE);
+   page->spt = mmu_mem