Re: [kvm-devel] guest PTE write emulation
Avi Kivity wrote: > On a pte update, npte will always be 1. On a pde update, we won't do > anything in mmu_pte_write_new_pte because it doesn't handle > pdes. If we > extend it to handle pdes, then we need either to modify the > new gpde or > to have the update take the quadrant into account. Agree, so we either just skip npte=2 branch or polish it. How about following changes? diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index c85c664..37a7dc5 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1156,7 +1156,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, struct hlist_node *node, *n; struct hlist_head *bucket; unsigned index; - u64 *spte; + u64 *spte, gpte[2]; unsigned offset = offset_in_page(gpa); unsigned pte_size; unsigned page_offset; @@ -1164,7 +1164,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, unsigned quadrant; int level; int flooded = 0; - int npte; + int npte, i; pgprintk("%s: gpa %llx bytes %d\n", __FUNCTION__, gpa, bytes); if (gfn == vcpu->last_pt_write_gfn) { @@ -1202,6 +1202,7 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, page_offset = offset; level = page->role.level; npte = 1; + gpte[0] = *(long*)new; if (page->role.glevels == PT32_ROOT_LEVEL) { page_offset <<= 1; /* 32->64 */ /* @@ -1209,10 +1210,16 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, * only 2MB. So we need to double the offset again * and zap two pdes instead of one. */ - if (level == PT32_ROOT_LEVEL) { + if ((level == PT_DIRECTORY_LEVEL) && + (vcpu->cr4 & CR4_PSE_MASK) && + (gpte[0] & PT_PAGE_SIZE_MASK)) { page_offset &= ~7; /* kill rounding error */ page_offset <<= 1; npte = 2; + gpte[1] = gpte[0]; + gpte[1] += 1 << 21; } quadrant = page_offset >> PAGE_SHIFT; page_offset &= ~PAGE_MASK; @@ -1220,9 +1227,9 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, continue; } spte = &page->spt[page_offset / sizeof(*spte)]; - while (npte--) { + for (i=0; i < npte; i++) { mmu_pte_write_zap_pte(vcpu, page, spte); - mmu_pte_write_new_pte(vcpu, page, spte, new, bytes); + mmu_pte_write_new_pte(vcpu, page, spte, &gpte[i], bytes); ++spte; } } - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] guest PTE write emulation
Dong, Eddie wrote: > Avi Kivity wrote: > >> On a pte update, npte will always be 1. On a pde update, we won't do >> anything in mmu_pte_write_new_pte because it doesn't handle >> pdes. If we >> extend it to handle pdes, then we need either to modify the >> new gpde or >> to have the update take the quadrant into account. >> > > Agree, so we either just skip npte=2 branch or polish it. > How about following changes? > Isn't it all dead code, as gpte[1] will never be used? Maybe just move the page->role.level test from mmu_pte_write_new_pte() back to kvm_mmu_pte_write(), so that it's clear that the new data isn't used for the non-pte case? -- error compiling committee.c: too many arguments to function - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] guest PTE write emulation
Avi Kivity wrote: > Dong, Eddie wrote: >> Avi Kivity wrote: >> >>> On a pte update, npte will always be 1. On a pde update, we won't >>> do anything in mmu_pte_write_new_pte because it doesn't handle >>> pdes. If we extend it to handle pdes, then we need either to >>> modify the new gpde or to have the update take the quadrant into >>> account. >>> >> >> Agree, so we either just skip npte=2 branch or polish it. How about >> following changes? >> > > Isn't it all dead code, as gpte[1] will never be used? If removing dead code, we can do like following: diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index c85c664..7d7cd42 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -1209,11 +1209,6 @@ void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa, * only 2MB. So we need to double the offset again * and zap two pdes instead of one. */ - if (level == PT32_ROOT_LEVEL) { - page_offset &= ~7; /* kill rounding error */ - page_offset <<= 1; - npte = 2; - } quadrant = page_offset >> PAGE_SHIFT; page_offset &= ~PAGE_MASK; if (quadrant != page->role.quadrant) > > Maybe just move the page->role.level test from mmu_pte_write_new_pte() role test need to combine with guest CR4 and PS bits. Current code find about 3.5K times of fake 4M page. > back to kvm_mmu_pte_write(), so that it's clear that the new > data isn't > used for the non-pte case? Agree. Eddie - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
[kvm-devel] [PATCH 0/3] KVM paravirt_ops implementation
This is the start of a paravirt_ops implementation for KVM. Most of it was done by Ingo Molnar, I just moved things around a bit. I don't think there's a measurable performance benefit just yet but there are a few more optimizations that I think we can get in time for 2.6.23 that will be measurable. Regards, Anthony Liguori - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
[kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure
Regards, Anthony Liguori Subject: [PATCH] Add KVM paravirt_ops implementation From: Anthony Liguori <[EMAIL PROTECTED]> This patch adds the basic infrastructure for paravirtualizing a KVM guest. Discovery of running under KVM is done by sharing a page of memory between the guest and host (initially through an MSR write). This is based on a patch written by Ingo Molnar. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> Index: kvm/arch/i386/Kconfig === --- kvm.orig/arch/i386/Kconfig 2007-05-30 08:42:31.0 -0500 +++ kvm/arch/i386/Kconfig 2007-05-30 08:42:38.0 -0500 @@ -231,6 +231,13 @@ at the moment), by linking the kernel to a GPL-ed ROM module provided by the hypervisor. +config KVM_GUEST + bool "KVM paravirt-ops support" + depends on PARAVIRT + help + This option enables various optimizations for running under the KVM + hypervisor. + config ACPI_SRAT bool default y Index: kvm/arch/i386/kernel/Makefile === --- kvm.orig/arch/i386/kernel/Makefile 2007-05-30 08:42:26.0 -0500 +++ kvm/arch/i386/kernel/Makefile 2007-05-30 08:42:38.0 -0500 @@ -41,6 +41,7 @@ obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_VMI) += vmi.o vmiclock.o +obj-$(CONFIG_KVM_GUEST) += kvm.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-y+= pcspeaker.o Index: kvm/arch/i386/kernel/kvm.c === --- /dev/null 1970-01-01 00:00:00.0 + +++ kvm/arch/i386/kernel/kvm.c 2007-05-30 09:29:37.0 -0500 @@ -0,0 +1,100 @@ +/* + * KVM paravirt_ops implementation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2007, Red Hat, Inc., Ingo Molnar <[EMAIL PROTECTED]> + * Copyright IBM Corporation, 2007 + * Authors: Anthony Liguori <[EMAIL PROTECTED]> + */ + +#include +#include +#include +#include + +static DEFINE_PER_CPU(struct kvm_vcpu_para_state, para_state); +extern unsigned char hypercall_addr[4]; + +static void kvm_guest_setup(void) +{ + paravirt_ops.name = "KVM"; + paravirt_ops.paravirt_enabled = 1; +} + +/* + * This is the vm-syscall address - to be patched by the host to + * VMCALL (Intel) or VMMCALL (AMD), depending on the CPU model: + */ +asm ( + ".globl hypercall_addr\n" + ".align 4\n" + "hypercall_addr:\n" + "movl $-38, %eax\n" + "ret\n" +); + +static int kvm_guest_register_para(int cpu) +{ + struct kvm_vcpu_para_state *para_state = &per_cpu(para_state, cpu); + + printk(KERN_DEBUG "kvm guest on VCPU#%d: trying to register para_state %p\n", + cpu, para_state); + + /* + * Try to write to a magic MSR (which is invalid on any real CPU), + * and thus signal to KVM that we wish to entering paravirtualized + * mode: + */ + para_state->guest_version = KVM_PARA_API_VERSION; + para_state->host_version = -1; + para_state->size = sizeof(*para_state); + para_state->ret = 0; + para_state->hypercall_gpa = __pa(hypercall_addr); + + if (wrmsr_safe(MSR_KVM_API_MAGIC, __pa(para_state), 0)) { + printk(KERN_INFO "KVM guest: WRMSR probe failed.\n"); + return -ENOENT; + } + + printk(KERN_DEBUG "kvm guest: host returned %d\n", para_state->ret); + printk(KERN_DEBUG "kvm guest: host version: %d\n", para_state->host_version); + printk(KERN_DEBUG "kvm guest: syscall entry: %02x %02x %02x %02x\n", + hypercall_addr[0], hypercall_addr[1], + hypercall_addr[2], hypercall_addr[3]); + + if (para_state->ret) { + printk(KERN_ERR "kvm guest: host refused registration.\n"); + return para_state->ret; + } + + return 0; +} + +static int __init kvm_guest_init(void) +{ + int rc; + + rc = kvm_guest_register_para(smp_processor_id()); + if (rc) { + printk(KERN_INFO "paravirt KVM unavailable\n"); + goto out; + } + + kvm_guest_setup(); + out: + return 0; +} +core_initcall(kvm_guest_init); - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists
[kvm-devel] [PATCH 2/3][PARAVIRT] Make IO delay a NOP
Regards, Anthony Liguori Subject: [PATCH][PARAVIRT] Make IO delay a NOP for paravirt guests No delay is required in between PIO operations under KVM guests so make IO delay a NOP. This was originally part of Ingo Molnar's paravirt series. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> Index: kvm/arch/i386/kernel/kvm.c === --- kvm.orig/arch/i386/kernel/kvm.c 2007-05-30 09:30:42.0 -0500 +++ kvm/arch/i386/kernel/kvm.c 2007-05-30 09:31:46.0 -0500 @@ -28,9 +28,17 @@ static DEFINE_PER_CPU(struct kvm_vcpu_para_state, para_state); extern unsigned char hypercall_addr[4]; +/* + * No need for any "IO delay" on KVM + */ +static void kvm_io_delay(void) +{ +} + static void kvm_guest_setup(void) { paravirt_ops.name = "KVM"; + paravirt_ops.io_delay = kvm_io_delay; paravirt_ops.paravirt_enabled = 1; } - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
[kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Regards, Anthony Liguori Subject: [PATCH][PARAVIRT] Eliminate unnecessary CR3 read in TLB flush This patch eliminates the CR3 read (which would cause a VM exit) in the TLB flush path. The patch is based on Ingo Molnar's paravirt series. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> Index: kvm/arch/i386/kernel/kvm.c === --- kvm.orig/arch/i386/kernel/kvm.c 2007-05-30 09:13:48.0 -0500 +++ kvm/arch/i386/kernel/kvm.c 2007-05-30 09:14:24.0 -0500 @@ -24,11 +24,35 @@ #include #include #include +#include +#include +#include static DEFINE_PER_CPU(struct kvm_vcpu_para_state, para_state); extern unsigned char hypercall_addr[4]; /* + * Avoid the VM exit upon cr3 load by using the cached + * ->active_mm->pgd value: + */ +static void kvm_flush_tlb_user(void) +{ + write_cr3(__pa(current->active_mm->pgd)); +} + +/* + * Avoid VM exit for cr3 read by calling into kvm_flush_tlb_user + */ +static fastcall void kvm_flush_tlb_kernel(void) +{ + unsigned long orig_cr4 = read_cr4(); + + write_cr4(orig_cr4 & ~X86_CR4_PGE); + kvm_flush_tlb_user(); + write_cr4(orig_cr4); +} + +/* * No need for any "IO delay" on KVM */ static void kvm_io_delay(void) @@ -38,6 +62,8 @@ static void kvm_guest_setup(void) { paravirt_ops.name = "KVM"; + paravirt_ops.flush_tlb_user = kvm_flush_tlb_user; + paravirt_ops.flush_tlb_kernel = kvm_flush_tlb_kernel; paravirt_ops.io_delay = kvm_io_delay; paravirt_ops.paravirt_enabled = 1; } - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
On Wednesday 30 May 2007 16:53:41 Anthony Liguori wrote: > Subject: [PATCH][PARAVIRT] Eliminate unnecessary CR3 read in TLB flush > > This patch eliminates the CR3 read (which would cause a VM exit) in the TLB > flush path. The patch is based on Ingo Molnar's paravirt series. > This change could be just done generically for the native architecture, couldn't it? -Andi - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
[kvm-devel] Hypercall numbering and the -rt tree
Hi Ingo, In my last series, I avoided submitting any patches that used hypercalls. Part of the reason is that I know that your tree already has assigned hypercalls out to various things. Some make sense (like flush_cr3_cache) but some are, at least, contentious (like the pv disk hypercalls). I'd like to start using hypercalls but this means squashing over some of the hypercalls that are defined in the -rt tree. Do you have a problem with this? Is this going to cause major breakages for anyone? Regards, Anthony Liguori - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Andi Kleen wrote: On Wednesday 30 May 2007 16:53:41 Anthony Liguori wrote: Subject: [PATCH][PARAVIRT] Eliminate unnecessary CR3 read in TLB flush This patch eliminates the CR3 read (which would cause a VM exit) in the TLB flush path. The patch is based on Ingo Molnar's paravirt series. This change could be just done generically for the native architecture, couldn't it? Sure. It adds a few more cycles onto native though (two memory reads, and some math). How does the following look? Regards, Anthony Liguori -Andi Subject: [PATCH] Avoid reading CR3 on TLB flush From: Anthony Liguori <[EMAIL PROTECTED]> In a virtualized environment, there is significant overhead in reading and writing control registers. Since we already have the value of CR3 in current->active_mm->pgd, we can avoid taking the exit on CR3 read when doing a TLB flush. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> Index: kvm/include/asm-i386/tlbflush.h === --- kvm.orig/include/asm-i386/tlbflush.h 2007-05-30 10:22:48.0 -0500 +++ kvm/include/asm-i386/tlbflush.h 2007-05-30 10:22:54.0 -0500 @@ -14,13 +14,12 @@ #define __native_flush_tlb() \ do {\ - unsigned int tmpreg; \ + unsigned int cr3 = __pa(current->active_mm->pgd); \ \ __asm__ __volatile__( \ - "movl %%cr3, %0; \n" \ "movl %0, %%cr3; # flush TLB \n" \ - : "=r" (tmpreg) \ - :: "memory"); \ + :: "r" (cr3) \ + : "memory"); \ } while (0) /* @@ -29,18 +28,18 @@ */ #define __native_flush_tlb_global() \ do {\ - unsigned int tmpreg, cr4, cr4_orig; \ + unsigned int cr3 = __pa(current->active_mm->pgd); \ + unsigned int cr4, cr4_orig;\ \ __asm__ __volatile__( \ - "movl %%cr4, %2; # turn off PGE \n" \ - "movl %2, %1;\n" \ - "andl %3, %1;\n" \ - "movl %1, %%cr4; \n" \ - "movl %%cr3, %0; \n" \ - "movl %0, %%cr3; # flush TLB\n" \ - "movl %2, %%cr4; # turn PGE back on \n" \ - : "=&r" (tmpreg), "=&r" (cr4), "=&r" (cr4_orig) \ - : "i" (~X86_CR4_PGE)\ + "movl %%cr4, %1; # turn off PGE \n" \ + "movl %1, %0;\n" \ + "andl %2, %0;\n" \ + "movl %0, %%cr4; \n" \ + "movl %3, %%cr3; # flush TLB\n" \ + "movl %1, %%cr4; # turn PGE back on \n" \ + : "=&r" (cr4), "=&r" (cr4_orig) \ + : "i" (~X86_CR4_PGE), "r" (cr3) \ : "memory"); \ } while (0) - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Anthony Liguori wrote: > Sure. It adds a few more cycles onto native though (two memory reads, > and some math). As opposed to a serializing control-register read? I think that's probably a win. J - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] Could not start KVM 24
>>> >I have used KVM 16 in x86_64 OpenSUSE 10.2 without problems. >> >>> >I have recently downloaded and installed x86_64 RPMs of KVM 24. >> >>> >But I could not start it: >> >>> > >> >>> >#qemu-kvm >> >>> >open /dev/kvm: No such file or directory >> >>> >Could not initialize KVM, will disable KVM support >> >>> > >> >>> >I have Intel VT enabled in my PC and >> >>> >MODULES_LOADED_ON_BOOT="kvm kvm-intel" >> >>> > >> >>> Please make sure the modules are really loaded (/sbin/lsmod|grep kvm) >> >>> Check of you see /dev/kvm and its permissions. >> >> >> >>The result of the command: >> >>/sbin/lsmod|grep kvm >> >>was: >> >>KVM 866160 >>I did not found any /dev/kvm file... > >You don't have kvm_intel module loaded. >If it fails loading please check dmesg. Thank you Dor! This is output of dmesg below. I could not understand why KVM 16 worked fine but KVM 25 or 26 does not work in OpenSUSE. Unfortunately, it's not possible to find KVM 16 because this version was replaced. linux-cyu9:~ # dmesg Bootdata ok (command line is root=/dev/sda3 vga=0x314resume=/dev/sda2 splash=silent) Linux version 2.6.18.2-34-default ([EMAIL PROTECTED]) (gcc version 4.1.2 20061115 (prerelease) (SUSE Linux)) #1 SMP Mon Nov 27 11:46:27 UTC 2006 BIOS-provided physical RAM map: BIOS-e820: - 0008f000 (usable) BIOS-e820: 0008f000 - 000a (reserved) BIOS-e820: 000e - 0010 (reserved) BIOS-e820: 0010 - 7e599000 (usable) BIOS-e820: 7e599000 - 7e5a6000 (reserved) BIOS-e820: 7e5a6000 - 7e656000 (usable) BIOS-e820: 7e656000 - 7e6a7000 (ACPI NVS) BIOS-e820: 7e6a7000 - 7e6ac000 (ACPI data) BIOS-e820: 7e6ac000 - 7e6f2000 (ACPI NVS) BIOS-e820: 7e6f2000 - 7e6f3000 (usable) BIOS-e820: 7e6f3000 - 7e6ff000 (ACPI data) BIOS-e820: 7e6ff000 - 7e70 (usable) BIOS-e820: 7e70 - 7f00 (reserved) BIOS-e820: fff0 - 0001 (reserved) DMI 2.4 present. ACPI: RSDP (v000 INTEL ) @ 0x000fe020 ACPI: RSDT (v001 INTEL DG965WH 0x0629 0x0113) @ 0x7e6fd038 ACPI: FADT (v001 INTEL DG965WH 0x0629 MSFT 0x0113) @ 0x7e6fc000 ACPI: MADT (v001 INTEL DG965WH 0x0629 MSFT 0x0113) @ 0x7e6f6000 ACPI: WDDT (v001 INTEL DG965WH 0x0629 MSFT 0x0113) @ 0x7e6f5000 ACPI: MCFG (v001 INTEL DG965WH 0x0629 MSFT 0x0113) @ 0x7e6f4000 ACPI: ASF! (v032 INTEL DG965WH 0x0629 MSFT 0x0113) @ 0x7e6f3000 ACPI: SSDT (v001 INTEL CpuPm 0x0629 MSFT 0x0113) @ 0x7e6ab000 ACPI: SSDT (v001 INTEL Cpu0Ist 0x0629 MSFT 0x0113) @ 0x7e6aa000 ACPI: SSDT (v001 INTEL Cpu1Ist 0x0629 MSFT 0x0113) @ 0x7e6a9000 ACPI: SSDT (v001 INTEL Cpu2Ist 0x0629 MSFT 0x0113) @ 0x7e6a8000 ACPI: SSDT (v001 INTEL Cpu3Ist 0x0629 MSFT 0x0113) @ 0x7e6a7000 ACPI: DSDT (v001 INTEL DG965WH 0x0629 MSFT 0x0113) @ 0x No NUMA configuration found Faking a node at -7e70 Bootmem setup node 0 -7e70 On node 0 totalpages: 509464 DMA zone: 2877 pages, LIFO batch:0 DMA32 zone: 506587 pages, LIFO batch:31 ACPI: PM-Timer IO Port: 0x408 ACPI: Local APIC address 0xfee0 ACPI: LAPIC (acpi_id[0x01] lapic_id[0x00] enabled) Processor #0 6:15 APIC version 20 ACPI: LAPIC (acpi_id[0x02] lapic_id[0x01] enabled) Processor #1 6:15 APIC version 20 ACPI: LAPIC (acpi_id[0x03] lapic_id[0x82] disabled) ACPI: LAPIC (acpi_id[0x04] lapic_id[0x83] disabled) ACPI: LAPIC_NMI (acpi_id[0x01] dfl dfl lint[0x1]) ACPI: LAPIC_NMI (acpi_id[0x02] dfl dfl lint[0x1]) ACPI: IOAPIC (id[0x02] address[0xfec0] gsi_base[0]) IOAPIC[0]: apic_id 2, version 32, address 0xfec0, GSI 0-23 ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 dfl dfl) ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level) ACPI: IRQ0 used by override. ACPI: IRQ2 used by override. ACPI: IRQ9 used by override. Setting APIC routing to physical flat Using ACPI (MADT) for SMP configuration information Allocating PCI resources starting at 8000 (gap: 7f00:80f0) SMP: Allowing 4 CPUs, 2 hotplug CPUs Built 1 zonelists. Total pages: 509464 Kernel command line: root=/dev/sda3 vga=0x314resume=/dev/sda2 splash=silent bootsplash: silent mode. Initializing CPU#0 PID hash table entries: 4096 (order: 12, 32768 bytes) time.c: Using 3.579545 MHz WALL PM GTOD PIT/TSC timer. time.c: Detected 2131.258 MHz processor. Console: colour dummy device 80x25 Dentry cache hash table entries: 262144 (order: 9, 2097152 bytes) Inode-cache hash table entries: 131072 (order: 8, 1048576 bytes) Checking aperture... Memory: 2031144k/2071552k available (1915k kernel code, 39232k reserved, 1278k data, 188k init) Calibratin
Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure
Anthony Liguori wrote: > Regards, > > Anthony Liguori I think we should use the CPUID instruction (leaf 0x4000) to detect the hypervosor as we are doing in Xen. Jun --- Intel Open Source Technology Center - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Jeremy Fitzhardinge wrote: > Anthony Liguori wrote: > > Sure. It adds a few more cycles onto native though (two memory reads, > > and some math). > > As opposed to a serializing control-register read? I think that's > probably a win. > > J > And actually you don't need the write to CR3 to flush TLB because the one to CR4 does it. Or does kvm_flush_tlb_kernel assume that CR3 is updated at the same time? Jun --- Intel Open Source Technology Center - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] State of current pv_ops backend for KVM?
Anthony Liguori wrote: > Rusty Russell wrote: >> On Mon, 2007-05-28 at 20:54 -0500, Anthony Liguori wrote: >> >>> Howdy, >>> >>> Does anyone know what the state of a pv_ops backend for KVM is? I >>> know Ingo has an implementation that implements CR3 caching but I >>> don't see any branches in Avi's git tree. >>> >>> Perhaps we should try for a simple pv_ops backend for 2.6.23 seeing >>> as how the host infrastructure is there? I'd be willing to do some >>> leg work here... >>> >> >> "Me too". It's at least worth toying with the mmu batching stuff to see >> if that can shave some cycles there: it seems that cr3 caching is the >> main win for context switch so batching there won't help. >> > > I'm working on a simple patch that just adds the infrastructure. > >> IIRC Ingo used a magic MSR to detect kvm, and there was a question mark >> over that. > > I am not too worried about using an MSR. There's really no fool proof > method here. The only thing to do IMHO is move the kvm pv_ops backend > into a separate file and add appropriate Kconfig stuff. > >> VMI uses a different technique, which is probably worth >> considering... >> > > VMI can determine that it's under VMware since VMware has to publish > an option ROM. It's quite neat, but overkill for what we're doing > (since we're not going to be using a ROM anyway). The near to being blessed way to do this is to use a CPUID leaf (I believe it is 0x4000) to identify the hypervisor. I don't have the exact details, but making CPUID 0x4000 return 'KVMKVMKVMKVM' sounds like a pretty safe detection technique. We're probably dropping the ROM probing in favor of this for 64-bit. Zach - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure
Nakajima, Jun wrote: > Anthony Liguori wrote: > >> Regards, >> >> Anthony Liguori >> > > I think we should use the CPUID instruction (leaf 0x4000) to detect > the hypervosor as we are doing in Xen. > Is that leaf reserved for such use by Intel? Regards, Anthony Liguori > Jun > --- > Intel Open Source Technology Center > - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] State of current pv_ops backend for KVM?
Zachary Amsden wrote: >> VMI can determine that it's under VMware since VMware has to publish >> an option ROM. It's quite neat, but overkill for what we're doing >> (since we're not going to be using a ROM anyway). > > The near to being blessed way to do this is to use a CPUID leaf (I > believe it is 0x4000) to identify the hypervisor. I don't have > the exact details, but making CPUID 0x4000 return 'KVMKVMKVMKVM' > sounds like a pretty safe detection technique. I apparently missed the memo :-) I'll update the patch. Regards, Anthony Liguori > We're probably dropping the ROM probing in favor of this for 64-bit. > Zach > - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Nakajima, Jun wrote: > And actually you don't need the write to CR3 to flush TLB because the > one to CR4 does it. Or does kvm_flush_tlb_kernel assume that CR3 is > updated at the same time? > > Jun It should not be necessary, but I believe this was added as a workaround to a PII erratum. I can't find the erratum, however, and the history of using G bits in Linux is complicated (several bugs introduced and many intermediate versions of this code). Since this is not performance critical, I think it is probably best to leave the CR3 reload. However, being unnecessary on modern processors, I already submitted a patch to eliminate it on 64-bit (or maybe just told Andi about it, I can't recall). Zach - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure
Anthony Liguori wrote: > Nakajima, Jun wrote: > > Anthony Liguori wrote: > > > > > Regards, > > > > > > Anthony Liguori > > > > > > > I think we should use the CPUID instruction (leaf 0x4000) to detect > > the hypervosor as we are doing in Xen. > > > > Is that leaf reserved for such use by Intel? > What I can say is that we (including the H/W teams) reviewed it internally. > Regards, > > Anthony Liguori > Jun --- Intel Open Source Technology Center - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Zachary Amsden wrote: > Nakajima, Jun wrote: > > And actually you don't need the write to CR3 to flush TLB because the > > one to CR4 does it. Or does kvm_flush_tlb_kernel assume that CR3 is updated > > at the same time? > > > > Jun > > It should not be necessary, but I believe this was added as a workaround > to a PII erratum. I can't find the erratum, however, and the history of > using G bits in Linux is complicated (several bugs introduced and many > intermediate versions of this code). Since this is not performance > critical, I think it is probably best to leave the CR3 reload. I don't recommend this for old processors. > > However, being unnecessary on modern processors, I already submitted a > patch to eliminate it on 64-bit (or maybe just told Andi about it, I > can't recall). > > Zach For KVM, it should be okay as well. But we can replace two CR4 accesses with just one hypercall. Jun --- Intel Open Source Technology Center - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Nakajima, Jun wrote: > Zachary Amsden wrote: > >> Nakajima, Jun wrote: >> >>> And actually you don't need the write to CR3 to flush TLB because >>> > the > >>> one to CR4 does it. Or does kvm_flush_tlb_kernel assume that CR3 is >>> > updated > >>> at the same time? >>> >>> Jun >>> >> It should not be necessary, but I believe this was added as a >> > workaround > >> to a PII erratum. I can't find the erratum, however, and the history >> > of > >> using G bits in Linux is complicated (several bugs introduced and many >> intermediate versions of this code). Since this is not performance >> critical, I think it is probably best to leave the CR3 reload. >> > > I don't recommend this for old processors. > > >> However, being unnecessary on modern processors, I already submitted a >> patch to eliminate it on 64-bit (or maybe just told Andi about it, I >> can't recall). >> >> Zach >> > > For KVM, it should be okay as well. But we can replace two CR4 accesses > with just one hypercall. > I was thinking the same thing :-) I was actually thinking about adding a hypercall to set/clear a bit in a control register. The thought here is that it would be useful not just for the global bit but also for CR0.TS although we would need another paravirt_op hook for stts. Regards, Anthony Liguori > Jun > --- > Intel Open Source Technology Center > - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Anthony Liguori wrote: > Nakajima, Jun wrote: > > For KVM, it should be okay as well. But we can replace two CR4 accesses > > with just one hypercall. > > > > I was thinking the same thing :-) > > I was actually thinking about adding a hypercall to set/clear a bit in a > control register. The thought here is that it would be useful not just > for the global bit but also for CR0.TS although we would need another > paravirt_op hook for stts. Given the optimizations for CPU virtualization in the current H/W, I'm not sure if such hooks are useful. Do you have any performance data that justify such hooks? > > Regards, > > Anthony Liguori > Jun --- Intel Open Source Technology Center - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Anthony Liguori wrote: > > I was thinking the same thing :-) > > I was actually thinking about adding a hypercall to set/clear a bit in > a control register. The thought here is that it would be useful not > just for the global bit but also for CR0.TS although we would need > another paravirt_op hook for stts. You don't need STTS: just cache CR0 value on writes and replace read_cr0. More paravirt_op hooks would likely be frowned upon, we've already got too many. Zach - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
Nakajima, Jun wrote: > Anthony Liguori wrote: > > > Given the optimizations for CPU virtualization in the current H/W, I'm > not sure if such hooks are useful. Do you have any performance data that > justify such hooks? > No, I don't. It was just a thought that was yet to be confirmed. Regards, Anthony Liguori >> Regards, >> >> Anthony Liguori >> >> > > > Jun > --- > Intel Open Source Technology Center > > - > This SF.net email is sponsored by DB2 Express > Download DB2 Express C - the FREE version of DB2 express and take > control of your XML. No limits. Just data. Click to get it now. > http://sourceforge.net/powerbar/db2/ > ___ > kvm-devel mailing list > kvm-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/kvm-devel > > - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure
On Wed, 2007-05-30 at 09:52 -0500, Anthony Liguori wrote: > This patch adds the basic infrastructure for paravirtualizing a KVM > guest. Hi Anthony! Nice patch, comments below. > Discovery of running under KVM is done by sharing a page of memory > between > the guest and host (initially through an MSR write). I missed the shared page in this patch? If you are going to do that, perhaps putting the hypercall magic in that page is a good idea? > +extern unsigned char hypercall_addr[4]; Perhaps in a header? > +asm ( > + ".globl hypercall_addr\n" > + ".align 4\n" > + "hypercall_addr:\n" > + "movl $-38, %eax\n" > + "ret\n" > +); I don't think we want the hypercall returning Linux error numbers, and magic numbers are bad too. ud2 here I think. > + para_state->guest_version = KVM_PARA_API_VERSION; > + para_state->host_version = -1; > + para_state->size = sizeof(*para_state); > + para_state->ret = 0; > + para_state->hypercall_gpa = __pa(hypercall_addr); Two versions, size *and* ret? This seems like overkill... > + if (wrmsr_safe(MSR_KVM_API_MAGIC, __pa(para_state), 0)) { > + printk(KERN_INFO "KVM guest: WRMSR probe failed.\n"); > + return -ENOENT; > + } How about printk(KERN_INFO "I am not a KVM guest\n");? > +static int __init kvm_guest_init(void) > +{ > + int rc; > + > + rc = kvm_guest_register_para(smp_processor_id()); > + if (rc) { > + printk(KERN_INFO "paravirt KVM unavailable\n"); Double-printk when KVM isn't detected seems overkill. Perhaps you could just fold this all into one function... (Personal gripe: I consider a variable named "rc" to be an admission of semantic defeat... "err" would be better here...) Thanks! Rusty. - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 3/3] Eliminate read_cr3 on TLB flush
On Wed, 2007-05-30 at 14:22 -0500, Anthony Liguori wrote: > I was actually thinking about adding a hypercall to set/clear a bit in a > control register. The thought here is that it would be useful not just > for the global bit but also for CR0.TS although we would need another > paravirt_op hook for stts. We don't really need one, because Linux (i386) only cares about the TS bit of cr0. From lguest (you'd want this per-cpu of course): static unsigned long current_cr0, current_cr3; static void lguest_write_cr0(unsigned long val) { lazy_hcall(LHCALL_TS, val & 8, 0, 0); current_cr0 = val; } static unsigned long lguest_read_cr0(void) { return current_cr0; } Cheers, Rusty. - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH 1/3] KVM paravirt_ops infrastructure
Rusty Russell wrote: > On Wed, 2007-05-30 at 09:52 -0500, Anthony Liguori wrote: > >> This patch adds the basic infrastructure for paravirtualizing a KVM >> guest. >> > > Hi Anthony! > > Nice patch, comments below. > > >> Discovery of running under KVM is done by sharing a page of memory >> between >> the guest and host (initially through an MSR write). >> > > I missed the shared page in this patch? If you are going to do that, > perhaps putting the hypercall magic in that page is a good idea? > para_state is the shared page. The address is passed to the KVM via the MSR (so it's a shared page owned by the guest). >> +extern unsigned char hypercall_addr[4]; >> > > Perhaps in a header? > > >> +asm ( >> + ".globl hypercall_addr\n" >> + ".align 4\n" >> + "hypercall_addr:\n" >> + "movl $-38, %eax\n" >> + "ret\n" >> +); >> > > I don't think we want the hypercall returning Linux error numbers, and > magic numbers are bad too. ud2 here I think. > Yeah, you're not the first one to suggest this. The thing is, KVM already has host-side support for a hypercall API. I didn't want to change that unless I had to. However, based on the prior feedback re: using CPUID, I will be changing it so I'll update this too. >> + para_state->guest_version = KVM_PARA_API_VERSION; >> + para_state->host_version = -1; >> + para_state->size = sizeof(*para_state); >> + para_state->ret = 0; >> + para_state->hypercall_gpa = __pa(hypercall_addr); >> > > Two versions, size *and* ret? This seems like overkill... > Yeah, I agree :-) I actually am not a huge fan of using version numbers. I think I'm going to try the next patch using a single version number and a feature bitmap. Some of the optimizations (like MMU batching) don't make sense in a NPT/EPT environment but the guest shouldn't have to be aware of that. >> + if (wrmsr_safe(MSR_KVM_API_MAGIC, __pa(para_state), 0)) { >> + printk(KERN_INFO "KVM guest: WRMSR probe failed.\n"); >> + return -ENOENT; >> + } >> > > How about printk(KERN_INFO "I am not a KVM guest\n");? > > >> +static int __init kvm_guest_init(void) >> +{ >> + int rc; >> + >> + rc = kvm_guest_register_para(smp_processor_id()); >> + if (rc) { >> + printk(KERN_INFO "paravirt KVM unavailable\n"); >> > > Double-printk when KVM isn't detected seems overkill. Perhaps you could > just fold this all into one function... > Already have. > (Personal gripe: I consider a variable named "rc" to be an admission of > semantic defeat... "err" would be better here...) > I'm not sure I agree that's one's better than the other. Although I guess if (err) { reads a little better... Regards, Anthony Liguori > Thanks! > Rusty. > > > - > This SF.net email is sponsored by DB2 Express > Download DB2 Express C - the FREE version of DB2 express and take > control of your XML. No limits. Just data. Click to get it now. > http://sourceforge.net/powerbar/db2/ > ___ > kvm-devel mailing list > kvm-devel@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/kvm-devel > > - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] Could not start KVM 24
>>> >>>The result of the command: >>> >>>/sbin/lsmod|grep kvm >>> >>>was: >>> >>>KVM 866160 >>>I did not found any /dev/kvm file... >> >>You don't have kvm_intel module loaded. >>If it fails loading please check dmesg. > >Thank you Dor! >This is output of dmesg below. I could not understand why KVM 16 worked >fine but KVM 25 or 26 does not work in OpenSUSE. Unfortunately, it's not >possible to find KVM 16 because this version was replaced. > >linux-cyu9:~ # dmesg >Bootdata ok (command line is root=/dev/sda3 vga=0x314 resume=/dev/sda2 >splash=silent) >Linux version 2.6.18.2-34-default ([EMAIL PROTECTED]) (gcc version 4.1.2 ...snip >IPv6 over IPv4 tunneling driver >IA-32 Microcode Update Driver: v1.14a <[EMAIL PROTECTED]> >audit(1180456935.694:3): audit_pid=2895 old=0 by auid=4294967295 >bootsplash: status on console 0 changed to on > > > The kvm modules are not loaded. Please insmod kvm and kvm-intel modules. - This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
[kvm-devel] pre-cleanup for SMP: move mmu cache to be VM bais
Move per VCPU mmu_memory_cache to be VM basis. Your opnion? Eddie diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h index 0632d0b..77989b4 100644 --- a/drivers/kvm/kvm.h +++ b/drivers/kvm/kvm.h @@ -300,11 +300,6 @@ struct kvm_vcpu { struct kvm_mmu mmu; - struct kvm_mmu_memory_cache mmu_pte_chain_cache; - struct kvm_mmu_memory_cache mmu_rmap_desc_cache; - struct kvm_mmu_memory_cache mmu_page_cache; - struct kvm_mmu_memory_cache mmu_page_header_cache; - gfn_t last_pt_write_gfn; int last_pt_write_count; @@ -383,6 +378,11 @@ struct kvm { unsigned long rmap_overflow; struct list_head vm_list; struct file *filp; + + struct kvm_mmu_memory_cache mmu_pte_chain_cache; + struct kvm_mmu_memory_cache mmu_rmap_desc_cache; + struct kvm_mmu_memory_cache mmu_page_cache; + struct kvm_mmu_memory_cache mmu_page_header_cache; }; struct descriptor_table { diff --git a/drivers/kvm/mmu.c b/drivers/kvm/mmu.c index 46491b4..b2578a8 100644 --- a/drivers/kvm/mmu.c +++ b/drivers/kvm/mmu.c @@ -231,19 +231,19 @@ static int __mmu_topup_memory_caches(struct kvm_vcpu *vcpu, gfp_t gfp_flags) { int r; - r = mmu_topup_memory_cache(&vcpu->mmu_pte_chain_cache, + r = mmu_topup_memory_cache(&vcpu->kvm->mmu_pte_chain_cache, pte_chain_cache, 4, gfp_flags); if (r) goto out; - r = mmu_topup_memory_cache(&vcpu->mmu_rmap_desc_cache, + r = mmu_topup_memory_cache(&vcpu->kvm->mmu_rmap_desc_cache, rmap_desc_cache, 1, gfp_flags); if (r) goto out; - r = mmu_topup_memory_cache(&vcpu->mmu_page_cache, + r = mmu_topup_memory_cache(&vcpu->kvm->mmu_page_cache, mmu_page_cache, 4, gfp_flags); if (r) goto out; - r = mmu_topup_memory_cache(&vcpu->mmu_page_header_cache, + r = mmu_topup_memory_cache(&vcpu->kvm->mmu_page_header_cache, mmu_page_header_cache, 4, gfp_flags); out: return r; @@ -266,10 +266,10 @@ static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) static void mmu_free_memory_caches(struct kvm_vcpu *vcpu) { - mmu_free_memory_cache(&vcpu->mmu_pte_chain_cache); - mmu_free_memory_cache(&vcpu->mmu_rmap_desc_cache); - mmu_free_memory_cache(&vcpu->mmu_page_cache); - mmu_free_memory_cache(&vcpu->mmu_page_header_cache); + mmu_free_memory_cache(&vcpu->kvm->mmu_pte_chain_cache); + mmu_free_memory_cache(&vcpu->kvm->mmu_rmap_desc_cache); + mmu_free_memory_cache(&vcpu->kvm->mmu_page_cache); + mmu_free_memory_cache(&vcpu->kvm->mmu_page_header_cache); } static void *mmu_memory_cache_alloc(struct kvm_mmu_memory_cache *mc, @@ -293,26 +293,26 @@ static void mmu_memory_cache_free(struct kvm_mmu_memory_cache *mc, void *obj) static struct kvm_pte_chain *mmu_alloc_pte_chain(struct kvm_vcpu *vcpu) { - return mmu_memory_cache_alloc(&vcpu->mmu_pte_chain_cache, + return mmu_memory_cache_alloc(&vcpu->kvm->mmu_pte_chain_cache, sizeof(struct kvm_pte_chain)); } static void mmu_free_pte_chain(struct kvm_vcpu *vcpu, struct kvm_pte_chain *pc) { - mmu_memory_cache_free(&vcpu->mmu_pte_chain_cache, pc); + mmu_memory_cache_free(&vcpu->kvm->mmu_pte_chain_cache, pc); } static struct kvm_rmap_desc *mmu_alloc_rmap_desc(struct kvm_vcpu *vcpu) { - return mmu_memory_cache_alloc(&vcpu->mmu_rmap_desc_cache, + return mmu_memory_cache_alloc(&vcpu->kvm->mmu_rmap_desc_cache, sizeof(struct kvm_rmap_desc)); } static void mmu_free_rmap_desc(struct kvm_vcpu *vcpu, struct kvm_rmap_desc *rd) { - mmu_memory_cache_free(&vcpu->mmu_rmap_desc_cache, rd); + mmu_memory_cache_free(&vcpu->kvm->mmu_rmap_desc_cache, rd); } /* @@ -471,8 +471,8 @@ static void kvm_mmu_free_page(struct kvm_vcpu *vcpu, { ASSERT(is_empty_shadow_page(page_head->spt)); list_del(&page_head->link); - mmu_memory_cache_free(&vcpu->mmu_page_cache, page_head->spt); - mmu_memory_cache_free(&vcpu->mmu_page_header_cache, page_head); + mmu_memory_cache_free(&vcpu->kvm->mmu_page_cache, page_head->spt); + mmu_memory_cache_free(&vcpu->kvm->mmu_page_header_cache, page_head); ++vcpu->kvm->n_free_mmu_pages; } @@ -489,9 +489,9 @@ static struct kvm_mmu_page *kvm_mmu_alloc_page(struct kvm_vcpu *vcpu, if (!vcpu->kvm->n_free_mmu_pages) return NULL; - page = mmu_memory_cache_alloc(&vcpu->mmu_page_header_cache, + page = mmu_memory_cache_alloc(&vcpu->kvm->mmu_page_header_cache, sizeof *page); - page->spt = mmu_memory_cache_alloc(&vcpu->mmu_page_cache, PAGE_SIZE); + page->spt = mmu_mem