[kvm-devel] [PATCH 1/2] Revert kvm: qemu: GET_CONFIGURATION fixes to allow OpenSolaris CD-ROM access
This reverts commit b64c20519d5826875679b6df85afebed27e1a9a8. Conflicts: qemu/hw/ide.c (keep uint64_t total_sectors) Signed-off-by: Carlo Marcelo Arenas Belon [EMAIL PROTECTED] --- qemu/hw/ide.c | 27 --- 1 files changed, 8 insertions(+), 19 deletions(-) diff --git a/qemu/hw/ide.c b/qemu/hw/ide.c index 90f2f2b..9a22db9 100644 --- a/qemu/hw/ide.c +++ b/qemu/hw/ide.c @@ -1640,7 +1640,6 @@ static void ide_atapi_cmd(IDEState *s) break; case GPCMD_GET_CONFIGURATION: { -uint32_t len; uint64_t total_sectors; /* only feature 0 is supported */ @@ -1649,27 +1648,17 @@ static void ide_atapi_cmd(IDEState *s) ASC_INV_FIELD_IN_CMD_PACKET); break; } -max_len = ube16_to_cpu(packet + 7); -bdrv_get_geometry(s-bs, total_sectors); memset(buf, 0, 32); -if (total_sectors) { -if (total_sectors 1433600) { -buf[7] = 0x10; /* DVD-ROM */ -} else { -buf[7] = 0x08; /* CD-ROM */ -} -} else { -buf[7] = 0x00; /* no current profile */ -} -buf[10] = 0x02 | 0x01; /* persistent and current */ -buf[11] = 0x08; /* size of profile list = 4 bytes per profile */ +bdrv_get_geometry(s-bs, total_sectors); +buf[3] = 16; +buf[7] = total_sectors = 1433600 ? 0x08 : 0x10; /* current profile */ +buf[10] = 0x10 | 0x1; +buf[11] = 0x08; /* size of profile list */ buf[13] = 0x10; /* DVD-ROM profile */ -buf[14] = buf[13] == buf[7]; /* (in)active */ +buf[14] = buf[7] == 0x10; /* (in)active */ buf[17] = 0x08; /* CD-ROM profile */ -buf[18] = buf[17] == buf[7]; /* (in)active */ -len = 8 + 4 + buf[11]; /* headers + size of profile list */ -cpu_to_ube32(buf, len - 4); /* data length */ -ide_atapi_cmd_reply(s, len, max_len); +buf[18] = buf[7] == 0x08; /* (in)active */ +ide_atapi_cmd_reply(s, 32, 32); break; } default: -- 1.5.3.7 - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
[kvm-devel] [PATCH 2/2] qemu: Multi-profile DVD-ROM support
This is version 2.2 of the original patch proposed to fix the MMC6 command for GET CONFIGURATION for multi profile CD/DVD support and that was originally committed as b64c20519d5826875679b6df85afebed27e1a9a8 cvs -q diff -up -r1.79 -r1.80 hw/ide.c Signed-off-by: Carlo Marcelo Arenas Belon [EMAIL PROTECTED] --- qemu/hw/ide.c | 123 +++-- 1 files changed, 110 insertions(+), 13 deletions(-) diff --git a/qemu/hw/ide.c b/qemu/hw/ide.c index 9a22db9..54496c2 100644 --- a/qemu/hw/ide.c +++ b/qemu/hw/ide.c @@ -1,5 +1,5 @@ /* - * QEMU IDE disk and CD-ROM Emulator + * QEMU IDE disk and CD/DVD-ROM Emulator * * Copyright (c) 2003 Fabrice Bellard * Copyright (c) 2006 Openedhand Ltd. @@ -284,6 +284,58 @@ * of MODE_SENSE_POWER_PAGE */ #define GPMODE_CDROM_PAGE 0x0d +/* + * Based on values from linux/cdrom.h but extending CD_MINS + * to the maximum common size allowed by the Orange's Book ATIP + * + * 90 and 99 min CDs are also available but using them as the + * upper limit reduces the effectiveness of the heuristic to + * detect DVDs burned to less than 25% of their maximum capacity + */ + +/* Some generally useful CD-ROM information */ +#define CD_MINS 80 /* max. minutes per CD */ +#define CD_SECS 60 /* seconds per minute */ +#define CD_FRAMES 75 /* frames per second */ +#define CD_FRAMESIZE2048 /* bytes per frame, cooked mode */ +#define CD_MAX_BYTES (CD_MINS * CD_SECS * CD_FRAMES * CD_FRAMESIZE) +#define CD_MAX_SECTORS (CD_MAX_BYTES / 512) + +/* + * The MMC values are not IDE specific and might need to be moved + * to a common header if they are also needed for the SCSI emulation + */ + +/* Profile list from MMC-6 revision 1 table 91 */ +#define MMC_PROFILE_NONE0x +#define MMC_PROFILE_CD_ROM 0x0008 +#define MMC_PROFILE_CD_R0x0009 +#define MMC_PROFILE_CD_RW 0x000A +#define MMC_PROFILE_DVD_ROM 0x0010 +#define MMC_PROFILE_DVD_R_SR0x0011 +#define MMC_PROFILE_DVD_RAM 0x0012 +#define MMC_PROFILE_DVD_RW_RO 0x0013 +#define MMC_PROFILE_DVD_RW_SR 0x0014 +#define MMC_PROFILE_DVD_R_DL_SR 0x0015 +#define MMC_PROFILE_DVD_R_DL_JR 0x0016 +#define MMC_PROFILE_DVD_RW_DL 0x0017 +#define MMC_PROFILE_DVD_DDR 0x0018 +#define MMC_PROFILE_DVD_PLUS_RW 0x001A +#define MMC_PROFILE_DVD_PLUS_R 0x001B +#define MMC_PROFILE_DVD_PLUS_RW_DL 0x002A +#define MMC_PROFILE_DVD_PLUS_R_DL 0x002B +#define MMC_PROFILE_BD_ROM 0x0040 +#define MMC_PROFILE_BD_R_SRM0x0041 +#define MMC_PROFILE_BD_R_RRM0x0042 +#define MMC_PROFILE_BD_RE 0x0043 +#define MMC_PROFILE_HDDVD_ROM 0x0050 +#define MMC_PROFILE_HDDVD_R 0x0051 +#define MMC_PROFILE_HDDVD_RAM 0x0052 +#define MMC_PROFILE_HDDVD_RW0x0053 +#define MMC_PROFILE_HDDVD_R_DL 0x0058 +#define MMC_PROFILE_HDDVD_RW_DL 0x005A +#define MMC_PROFILE_INVALID 0x + #define ATAPI_INT_REASON_CD 0x01 /* 0 = data transfer */ #define ATAPI_INT_REASON_IO 0x02 /* 1 = transfer to the host */ #define ATAPI_INT_REASON_REL0x04 @@ -1290,6 +1342,22 @@ static void ide_atapi_cmd_read(IDEState *s, int lba, int nb_sectors, } } +static inline uint8_t ide_atapi_set_profile(uint8_t *buf, uint8_t *index, +uint16_t profile) +{ +uint8_t *buf_profile = buf + 12; /* start of profiles */ + +buf_profile += ((*index) * 4); /* start of indexed profile */ +cpu_to_ube16 (buf_profile, profile); +buf_profile[2] = ((buf_profile[0] == buf[6]) (buf_profile[1] == buf[7])); + +/* each profile adds 4 bytes to the response */ +(*index)++; +buf[11] += 4; /* Additional Length */ + +return 4; +} + static void ide_atapi_cmd(IDEState *s) { const uint8_t *packet; @@ -1640,7 +1708,7 @@ static void ide_atapi_cmd(IDEState *s) break; case GPCMD_GET_CONFIGURATION: { -uint64_t total_sectors; +uint32_t len; /* only feature 0 is supported */ if (packet[2] != 0 || packet[3] != 0) { @@ -1648,17 +1716,46 @@ static void ide_atapi_cmd(IDEState *s) ASC_INV_FIELD_IN_CMD_PACKET); break; } -memset(buf, 0, 32); -bdrv_get_geometry(s-bs, total_sectors); -buf[3] = 16; -buf[7] = total_sectors = 1433600 ? 0x08 : 0x10; /* current profile */ -buf[10] = 0x10 | 0x1; -buf[11] = 0x08; /* size of profile list */ -buf[13] = 0x10; /* DVD-ROM profile */ -buf[14] = buf[7] == 0x10; /* (in)active */ -buf[17] = 0x08; /* CD-ROM profile */ -buf[18] =
Re: [kvm-devel] [PATCH] add acpi powerbutton support
On Mon, Jan 14, 2008 at 04:41:23AM -0800, Alexey Eremenko wrote: As far as I know, Qemu/KVM already had ACPI power button; It can be used via Qemu Monitor: system_shutdown command. Before 46b1a7377b55a3b6317b18fff64f1a80de7d3120 system_powerdown was a do { } while(0); on i386 and the FF pwrbtn wasn't enabled in the FADT. -- Guido - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] KVM swapping with mmu notifiers
Hi Andrea, path for kvm so I guess not worth optimizing in the short/mid term, but by optimizing the invalidate_page case we may halve the number of tlb flushes for some common case. I leave it for later, the swapping is heavily I/O bound anyway so a some more ipi in smp host shouldn't be very measurable (on UP host it makes no difference to flush multiple times in practice). Signed-off-by: Andrea Arcangeli [EMAIL PROTECTED] diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4086080..c527d7d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -18,6 +18,7 @@ config KVM tristate Kernel-based Virtual Machine (KVM) support depends on ARCH_SUPPORTS_KVM EXPERIMENTAL select PREEMPT_NOTIFIERS + select MMU_NOTIFIER select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 324ff9a..103c270 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -532,6 +532,36 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) kvm_flush_remote_tlbs(kvm); } +static void unmap_spte(struct kvm *kvm, u64 *spte) +{ + struct page *page = pfn_to_page((*spte PT64_BASE_ADDR_MASK) PAGE_SHIFT); + get_page(page); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(kvm); + __free_page(page); +} + +void kvm_rmap_unmap_gfn(struct kvm *kvm, gfn_t gfn) +{ + unsigned long *rmapp; + u64 *spte, *curr_spte; + + spin_lock(kvm-mmu_lock); + gfn = unalias_gfn(kvm, gfn); + rmapp = gfn_to_rmap(kvm, gfn); The alias and memslot maps are protected only by mmap_sem, so you should make kvm_set_memory_region/set_memory_alias grab the mmu spinlock in addition to mmap_sem in write mode. kvm_mmu_zap_all() grabs the mmu lock.. that should probably move up into the caller. + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!(*spte PT_PRESENT_MASK)); + rmap_printk(rmap_swap_page: spte %p %llx\n, spte, *spte); + curr_spte = spte; + spte = rmap_next(kvm, rmapp, spte); + unmap_spte(kvm, curr_spte); + } + spin_unlock(kvm-mmu_lock); +} + #ifdef MMU_DEBUG static int is_empty_shadow_page(u64 *spt) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8a90403..e9a3f6e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3159,6 +3159,36 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu-arch.pio_data); } +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) +{ + return container_of(mn, struct kvm, mmu_notifier); +} + +void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + gfn_t gfn = hva_to_gfn(kvm, address); + BUG_ON(mm != kvm-mm); + if (gfn == -1UL) + return; + kvm_rmap_unmap_gfn(kvm, gfn); And then you also need to cover hva_to_gfn() to happen under the lock. +} + +void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn, +struct mm_struct *mm, +unsigned long start, unsigned long end) +{ + for (; start end; start += PAGE_SIZE) + kvm_mmu_notifier_invalidate_page(mn, mm, start); +} + +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_range = kvm_mmu_notifier_invalidate_range, + .invalidate_page= kvm_mmu_notifier_invalidate_page, +}; + struct kvm *kvm_arch_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); @@ -3167,6 +3197,7 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(kvm-arch.active_mmu_pages); + kvm-mmu_notifier.ops = kvm_mmu_notifier_ops; return kvm; } diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h index d6db0de..feacd77 100644 --- a/include/asm-x86/kvm_host.h +++ b/include/asm-x86/kvm_host.h @@ -404,6 +404,7 @@ int kvm_mmu_create(struct kvm_vcpu *vcpu); int kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte); +void kvm_rmap_unmap_gfn(struct kvm *kvm, gfn_t gfn); int kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot); void kvm_mmu_zap_all(struct kvm *kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2714068..85da7fa 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -117,6 +117,7 @@ struct kvm { struct kvm_io_bus pio_bus; struct kvm_vm_stat stat;
Re: [kvm-devel] KVM swapping with mmu notifiers
On Mon, Jan 14, 2008 at 11:45:39AM -0200, Marcelo Tosatti wrote: The alias and memslot maps are protected only by mmap_sem, so you yes, they are already protected and furthermore in write mode. should make kvm_set_memory_region/set_memory_alias grab the mmu spinlock in addition to mmap_sem in write mode. The mmu notifiers already hold the mmap_sem in read mode so I don't see why I should add the mmu_lock around memslots. The mmu_lock AFAICS is only needed to serialize with other vcpu fautls when updating the sptes and I already take it there. And then you also need to cover hva_to_gfn() to happen under the lock. hva_to_gfn only requires the mmap_sem in read mode and that's already taken implicitly before the mmu notifiers are called. - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] KVM swapping with mmu notifiers
Marcelo Tosatti wrote: +static void unmap_spte(struct kvm *kvm, u64 *spte) +{ +struct page *page = pfn_to_page((*spte PT64_BASE_ADDR_MASK) PAGE_SHIFT); +get_page(page); +rmap_remove(kvm, spte); +set_shadow_pte(spte, shadow_trap_nonpresent_pte); +kvm_flush_remote_tlbs(kvm); +__free_page(page); +} + +void kvm_rmap_unmap_gfn(struct kvm *kvm, gfn_t gfn) +{ +unsigned long *rmapp; +u64 *spte, *curr_spte; + +spin_lock(kvm-mmu_lock); +gfn = unalias_gfn(kvm, gfn); +rmapp = gfn_to_rmap(kvm, gfn); The alias and memslot maps are protected only by mmap_sem, so you should make kvm_set_memory_region/set_memory_alias grab the mmu spinlock in addition to mmap_sem in write mode. kvm_mmu_zap_all() grabs the mmu lock.. that should probably move up into the caller. Aren't mmu notifiers called with mmap_sem held for read? Maybe not from the swap path? -- error compiling committee.c: too many arguments to function - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] KVM swapping with mmu notifiers
On Mon, Jan 14, 2008 at 04:09:03PM +0200, Avi Kivity wrote: Marcelo Tosatti wrote: +static void unmap_spte(struct kvm *kvm, u64 *spte) +{ + struct page *page = pfn_to_page((*spte PT64_BASE_ADDR_MASK) PAGE_SHIFT); + get_page(page); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(kvm); + __free_page(page); +} + +void kvm_rmap_unmap_gfn(struct kvm *kvm, gfn_t gfn) +{ + unsigned long *rmapp; + u64 *spte, *curr_spte; + + spin_lock(kvm-mmu_lock); + gfn = unalias_gfn(kvm, gfn); + rmapp = gfn_to_rmap(kvm, gfn); The alias and memslot maps are protected only by mmap_sem, so you should make kvm_set_memory_region/set_memory_alias grab the mmu spinlock in addition to mmap_sem in write mode. kvm_mmu_zap_all() grabs the mmu lock.. that should probably move up into the caller. Aren't mmu notifiers called with mmap_sem held for read? Maybe not from the swap path? Good point, the swap path isn't covered by the mmap_sem, so Marcelo's right I need to fixup the locking a bit. - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] KVM swapping with mmu notifiers
Andrea Arcangeli wrote: Aren't mmu notifiers called with mmap_sem held for read? Maybe not from the swap path? Good point, the swap path isn't covered by the mmap_sem, so Marcelo's right I need to fixup the locking a bit. One idea I had at a time was to add -lock() and -unlock() callbacks to mmu notifiers that would be called in a sleepy context. But that seems heavy handed. Maybe it can be fixed in some clever way with rcu or with a rwlock around the memory slot map. -- error compiling committee.c: too many arguments to function - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [RFC] fix VMX TSC synchronicity
Hi Avi, On Sun, Jan 13, 2008 at 02:19:29PM +0200, Avi Kivity wrote: Marcelo Tosatti wrote: The boot TSC sync check is failing on recent Linux SMP guests on TSC stable hosts. What about tsc unstable hosts? If your patch convinces the guest its tsc is table, while the host tsc is not, then it may cause confusion later on. The adjustment to zero won't fool the guest because it assumes that the TSC's are synchronized. It will simply set the guest TSC to zero on all VCPUs based on the time VCPU0 was initialized. That is, setting -(vcpu[0].first_tsc) on all VCPU's will not correct any synchronization problem. Following patch attempts to address the problems, which are: 1) APIC_DM_STARTUP, which is only called for vcpu's other than vcpu0, will trigger -vcpu_reset(), setting the TSC to 0. Fix that by moving the guest_write_tsc(0) to vcpu_load. 2) vcpu's are initialized at different times by QEMU (vcpu0 init happens way earlier than the rest). Fix that by initializing the TSC of vcpu's 0 with reference to vcpu0 init tsc value. This way TSC synchronization is kept (apparently Xen does something similar). 3) The code which adjusts the TSC of a VCPU on physical CPU switch is meant to guarantee that the guest sees a monotonically increasing value. However there is a large gap, in terms of clocks, between the time which the source CPU TSC is read and the time the destination CPU TSC is read. So move those two reads to happen at vcpu_clear. I believe that 3) is the reason for the -no-kvm-irqchip problems reported by Avi on RHEL5, with kernels 2.6.21 (where vcpu-cpu pinning would fix the problem). Unfortunately I could reproduce that problem. 4-way guest with constant tick at 250Hz now reliably sees the TSC's synchronized, and idle guest CPU consumption is reduced by 50% (3-4% instead of 8%, the latter with acpi_pm_good boot parameter). diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4741806..e1c8cf4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -47,6 +47,8 @@ struct vcpu_vmx { struct kvm_vcpu vcpu; int launched; u8fail; +u64 first_tsc; +u64 tsc_this; u32 idt_vectoring_info; struct kvm_msr_entry *guest_msrs; struct kvm_msr_entry *host_msrs; @@ -254,6 +256,7 @@ static void vcpu_clear(struct vcpu_vmx *vmx) if (vmx-vcpu.cpu == -1) return; smp_call_function_single(vmx-vcpu.cpu, __vcpu_clear, vmx, 0, 1); +rdtscll(vmx-tsc_this); vmx-launched = 0; } @@ -480,6 +483,8 @@ static void vmx_load_host_state(struct vcpu_vmx *vmx) reload_host_efer(vmx); } +static void guest_write_tsc(u64 host_tsc, u64 guest_tsc); + /* * Switches to specified vcpu, until a matching vcpu_put(), but assumes * vcpu mutex is already taken. @@ -488,7 +493,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); u64 phys_addr = __pa(vmx-vmcs); -u64 tsc_this, delta; +u64 delta; if (vcpu-cpu != cpu) { vcpu_clear(vmx); @@ -511,6 +516,19 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) struct descriptor_table dt; unsigned long sysenter_esp; +if (unlikely(vcpu-cpu == -1)) { This can happen after migration, I believe. Right now the destination host will do -vcpu_reset() on all VCPU's on startup... so its already broken. This patch is not introducing any issues, just changing where it happens :) Perhaps fixing migration should be the subject of a separate patch? +rdtscll(vcpu-arch.host_tsc); +vmx-tsc_this = vcpu-arch.host_tsc; +if (vcpu-vcpu_id == 0) { +guest_write_tsc(vcpu-arch.host_tsc, 0); +vmx-first_tsc = vcpu-arch.host_tsc; +} else { +struct vcpu_vmx *cpu0; +cpu0 = to_vmx(vcpu-kvm-vcpus[0]); +guest_write_tsc(cpu0-first_tsc, 0); +} +} + Depending on vcpu_id == 0 can cause problems (for example, if vcpu 0 is somehow not the first to run). But QEMU will always run kvm_create() first (which does kvm_create_vcpu(0)), then start the other threads later. So I don't see how that can happen. We might initialize the tsc base on vm creation, and if the host tsc is synced, then the guest tsc should also be. vcpu-cpu = cpu; /* * Linux uses per-cpu TSS and GDT, so set these when switching @@ -526,8 +544,7 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* * Make sure the time stamp counter is monotonous. */ -
Re: [kvm-devel] [PATCH] fix cpuid function 4
On Mon, Jan 14, 2008 at 02:49:31PM +0100, Alexander Graf wrote: Hi, Currently CPUID function 4 is broken. This function's values rely on the value of ECX. To solve the issue cleanly, there is already a new API for cpuid settings, which is not used yet. Using the current interface, the function 4 can be easily passed through, by giving multiple function 4 outputs and increasing the index-identifier on the fly. This does not break compatibility. This fix is really important for Mac OS X, as it requires cache information. Please also see my previous patches for Mac OS X (or rather core duo target) compatibility. Regards, Alex diff --git a/kernel/x86.c b/kernel/x86.c index b55c177..73312e9 100644 --- a/kernel/x86.c +++ b/kernel/x86.c @@ -783,7 +783,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) { - int r, i; + int r, i, n = 0; struct kvm_cpuid_entry *cpuid_entries; r = -E2BIG; @@ -803,8 +803,17 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, vcpu-arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; vcpu-arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; vcpu-arch.cpuid_entries[i].edx = cpuid_entries[i].edx; - vcpu-arch.cpuid_entries[i].index = 0; - vcpu-arch.cpuid_entries[i].flags = 0; +switch(vcpu-arch.cpuid_entries[i].function) { +case 4: +vcpu-arch.cpuid_entries[i].index = n; +vcpu-arch.cpuid_entries[i].flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; +n++; +break; +default: +vcpu-arch.cpuid_entries[i].index = 0; +vcpu-arch.cpuid_entries[i].flags = 0; +break; +} I will not mention the whitespace damage here :-). Instead, I'd ask you to review, comment, and even try, the patch that I posted here not long ago, exposing all safe host cpuid functions to guests. Thanks, Dan. diff --git a/libkvm/libkvm-x86.c b/libkvm/libkvm-x86.c index 2fa8146..e9a8113 100644 --- a/libkvm/libkvm-x86.c +++ b/libkvm/libkvm-x86.c @@ -485,20 +485,109 @@ __u64 kvm_get_cr8(kvm_context_t kvm, int vcpu) } int kvm_setup_cpuid(kvm_context_t kvm, int vcpu, int nent, - struct kvm_cpuid_entry *entries) + struct kvm_cpuid_entry2 *entries) { - struct kvm_cpuid *cpuid; - int r; + int r = -ENOSYS; +#ifdef KVM_CAP_EXT_CPUID + r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_EXT_CPUID); + if (r = 0) +#endif + { /* kernel has no KVM_SET_CPUID2 */ + int i; + struct kvm_cpuid *cpuid; + + cpuid = malloc(sizeof(*cpuid) + + nent * sizeof(struct kvm_cpuid_entry)); + if (!cpuid) + return -ENOMEM; + + cpuid-nent = nent; + for (i = 0; i nent; ++i) { + cpuid-entries[i].function = entries[i].function; + cpuid-entries[i].eax = entries[i].eax; + cpuid-entries[i].ebx = entries[i].ebx; + cpuid-entries[i].ecx = entries[i].ecx; + cpuid-entries[i].edx = entries[i].edx; + } + r = ioctl(kvm-vcpu_fd[vcpu], KVM_SET_CPUID, cpuid); + if (r) + r = -errno; + + free(cpuid); + } +#ifdef KVM_CAP_EXT_CPUID + else { + struct kvm_cpuid2 *cpuid; cpuid = malloc(sizeof(*cpuid) + nent * sizeof(*entries)); if (!cpuid) return -ENOMEM; cpuid-nent = nent; memcpy(cpuid-entries, entries, nent * sizeof(*entries)); - r = ioctl(kvm-vcpu_fd[vcpu], KVM_SET_CPUID, cpuid); + r = ioctl(kvm-vcpu_fd[vcpu], KVM_SET_CPUID2, cpuid); + if (r) + r = -errno; + + free(cpuid); + } +#endif + return r; +} + +int kvm_get_supported_cpuid(kvm_context_t kvm, int *nent, + struct kvm_cpuid_entry2 *entries) +{ + struct kvm_cpuid2 *cpuid; + int r = -ENOSYS; + +#ifdef KVM_CAP_EXT_CPUID + r = ioctl(kvm-fd, KVM_CHECK_EXTENSION, KVM_CAP_EXT_CPUID); + if (r = 0) + return -ENOSYS; + + cpuid = malloc(sizeof(*cpuid) + *nent * sizeof(*entries)); + if (!cpuid) + return -ENOMEM; + cpuid-nent = *nent; + r = ioctl(kvm-vm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); + if (r) + r = -errno; + else { + memcpy(entries, cpuid-entries, *nent * sizeof(*entries)); + *nent = cpuid-nent; + } free(cpuid);
Re: [kvm-devel] [kvm-ppc-devel] RFC: MMIO endianness flag
I hope I've explained in the other mail I just sent why Qemu assuming little-endian for everything is not OK. One other important clarification: kvm_run-mmio.is_bigendian is about *one* *particular* *MMIO* *access*. It has only coincidental relationship to the endianness mode of the guest. -- Hollis Blanchard IBM Linux Technology Center On Mon, 2008-01-14 at 13:42 +0800, Xu, Anthony wrote: kvm_run-mmio.is_bigendian = vcpu-arch.some_register From your example code, I can know is_bigendian indicate whether guest is in bigendian mode when accessing MMIO. Qemu is responsible for handling MMIO emulation, Qemus always assume it is running on littleendian mode. So if guest accesses MMIO in bigendian mode, kvm need to transform it to littleendian before delivering this MMIO request to Qemu. This works for IA64 side. Thanks, - Anthony Hollis Blanchard wrote: We're just talking about a flag in the kvm_run.mmio structure, so it does not represent the state of any software, guest or host, other than that single MMIO access. This flag is only used for communication between host kernel and host userland, so the host kernel is always responsible for setting it. is_bigendian is just one more necessary piece of information for MMIO emulation. kvm_run already tells you that you are loading 4 bytes from address 0. What you don't know today is if byte 0 is the least significant or most significant byte. If is_bigendian is set, you know that byte 0 is the MSB and byte 3 is the LSB. If not, the opposite is true. In the simplest case, IA64's in-kernel MMIO emulation code could look something like: kvm_run-mmio.phys_addr = addr; kvm_run-mmio.len = len; ... kvm_run-mmio.is_bigendian = vcpu-arch.some_register BIGENDIAN; If IA64 has reverse-endian load/store instructions like PowerPC, then you would also need to consider the particular instruction used as well as the guest state. On Fri, 2008-01-11 at 10:02 +0800, Xu, Anthony wrote: Hi all, That's a good start to consider BE. Yes, IA64 support BE and LE. I have below comments. What does is_bigendian mean? Host is runing with BE or guest is running with BE. Who will set is_bigendian? For supporing BE, We need to consider host BE and guest BE. For IA64, most OS is running with LE, and Application can run with BE or LE, For example, Qemu can run with BE or LE. IMHO, we need two flags, host_is_bigendian indicates Qemu is running with BE Guest_is_bigendian indecates the guest application who is accessing MMIO Is running with LE. - Anthony Hollis Blanchard wrote: On Thu, 2008-01-10 at 17:28 +0200, Avi Kivity wrote: I'll apply that patch (with a #ifdef CONFIG_PPC so other archs don't use it by mistake). I don't think that's the right ifdef. For example, I believe IA64 can run in BE mode and so will have the same issue, and there are certainly other architectures (less relevant to the current code) that definitely are in the same situation. We need to plumb this through to the libkvm users anyways. Take a look at the patch below and tell me if you think it's not the right approach. x86 simply won't consider 'is_bigendian'. I spent a lot of time on this, and it's by far the cleanest solution I could come up with. diff --git a/libkvm/config-i386.mak b/libkvm/config-i386.mak --- a/libkvm/config-i386.mak +++ b/libkvm/config-i386.mak @@ -2,5 +2,6 @@ LIBDIR := /lib LIBDIR := /lib CFLAGS += -m32 CFLAGS += -D__i386__ +CFLAGS += -DARCH_MMIO_ENDIAN_LITTLE libkvm-$(ARCH)-objs := libkvm-x86.o diff --git a/libkvm/config-ia64.mak b/libkvm/config-ia64.mak --- a/libkvm/config-ia64.mak +++ b/libkvm/config-ia64.mak @@ -1,5 +1,6 @@ LIBDIR := /lib CFLAGS += -D__ia64__ +CFLAGS += -DARCH_MMIO_ENDIAN_LITTLE -DARCH_MMIO_ENDIAN_BIG libkvm-$(ARCH)-objs := libkvm-ia64.o diff --git a/libkvm/config-powerpc.mak b/libkvm/config-powerpc.mak --- a/libkvm/config-powerpc.mak +++ b/libkvm/config-powerpc.mak @@ -2,5 +2,6 @@ LIBDIR := /lib LIBDIR := /lib CFLAGS += -m32 CFLAGS += -D__powerpc__ +CFLAGS += -DARCH_MMIO_ENDIAN_BIG -DARCH_MMIO_ENDIAN_LITTLE libkvm-$(ARCH)-objs := libkvm-powerpc.o diff --git a/libkvm/config-x86_64.mak b/libkvm/config-x86_64.mak --- a/libkvm/config-x86_64.mak +++ b/libkvm/config-x86_64.mak @@ -2,5 +2,6 @@ LIBDIR := /lib64 LIBDIR := /lib64 CFLAGS += -m64 CFLAGS += -D__x86_64__ +CFLAGS += -DARCH_MMIO_ENDIAN_LITTLE libkvm-$(ARCH)-objs := libkvm-x86.o diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c --- a/libkvm/libkvm.c +++ b/libkvm/libkvm.c @@ -774,21 +774,56 @@ int kvm_set_sregs(kvm_context_t kvm, int return ioctl(kvm-vcpu_fd[vcpu], KVM_SET_SREGS, sregs); } +#ifdef ARCH_MMIO_ENDIAN_BIG +static int
Re: [kvm-devel] [kvm-ppc-devel] RFC: MMIO endianness flag
On Sun, 2008-01-13 at 11:42 +0200, Avi Kivity wrote: Do we really need to propagate endianness all the way to the user? Perhaps libkvm could call the regular mmio functions and do the transformation itself. Or maybe even the kernel can do this by itself? The kernel *already* does this by itself, and I'm attempting to explain why that is not sufficient. My point is precisely that the endianness information must be propagated to the user, otherwise the user may not have all the information it needs to emulate it. Here is the concrete example: * guest writes to MMIO * KVM passes MMIO information (physical address, number of bytes, value) to qemu * Qemu knows from the address that this access is for a passthough device, a special case the administrator has pre-configured * Qemu does mmap(/dev/mem), and writes length bytes of value at offset address. Now here's the catch: what endianness does qemu use when doing the write? If qemu only does BE, then a LE access from the guest will be byte-reversed when presented to the real hardware. -- Hollis Blanchard IBM Linux Technology Center - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH] add acpi powerbutton support
Guido Guenther wrote: - backout the madt interrupt source override changes Should these stay, but with active low instead of active high? They can go away, since active low is the default. For reference, I'm attaching the patches I'm using. -- error compiling committee.c: too many arguments to function From 736880c1e550f84778e0cd13ae7dd27988dee902 Mon Sep 17 00:00:00 2001 From: Avi Kivity [EMAIL PROTECTED] Date: Mon, 14 Jan 2008 17:34:11 +0200 Subject: [PATCH] kvm: bios: pci interrupts are active low Signed-off-by: Avi Kivity [EMAIL PROTECTED] --- bios/acpi-dsdt.dsl | 16 1 files changed, 8 insertions(+), 8 deletions(-) diff --git a/bios/acpi-dsdt.dsl b/bios/acpi-dsdt.dsl index df255ce..92fd126 100755 --- a/bios/acpi-dsdt.dsl +++ b/bios/acpi-dsdt.dsl @@ -382,7 +382,7 @@ DefinitionBlock ( Name(_HID, EISAID(PNP0C0F)) // PCI interrupt link Name(_UID, 1) Name(_PRS, ResourceTemplate(){ -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) { 5, 9, 10, 11 } }) Method (_STA, 0, NotSerialized) @@ -402,7 +402,7 @@ DefinitionBlock ( { Name (PRR0, ResourceTemplate () { -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) {1} }) CreateDWordField (PRR0, 0x05, TMP) @@ -427,7 +427,7 @@ DefinitionBlock ( Name(_HID, EISAID(PNP0C0F)) // PCI interrupt link Name(_UID, 2) Name(_PRS, ResourceTemplate(){ -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) { 5, 9, 10, 11 } }) Method (_STA, 0, NotSerialized) @@ -447,7 +447,7 @@ DefinitionBlock ( { Name (PRR0, ResourceTemplate () { -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) {1} }) CreateDWordField (PRR0, 0x05, TMP) @@ -472,7 +472,7 @@ DefinitionBlock ( Name(_HID, EISAID(PNP0C0F)) // PCI interrupt link Name(_UID, 3) Name(_PRS, ResourceTemplate(){ -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) { 5, 9, 10, 11 } }) Method (_STA, 0, NotSerialized) @@ -492,7 +492,7 @@ DefinitionBlock ( { Name (PRR0, ResourceTemplate () { -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) {1} }) CreateDWordField (PRR0, 0x05, TMP) @@ -517,7 +517,7 @@ DefinitionBlock ( Name(_HID, EISAID(PNP0C0F)) // PCI interrupt link Name(_UID, 4) Name(_PRS, ResourceTemplate(){ -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) { 5, 9, 10, 11 } }) Method (_STA, 0, NotSerialized) @@ -537,7 +537,7 @@ DefinitionBlock ( { Name (PRR0, ResourceTemplate () { -Interrupt (, Level, ActiveHigh, Shared) +Interrupt (, Level, ActiveLow, Shared) {1} }) CreateDWordField (PRR0, 0x05, TMP) -- 1.5.3.7 From 96ecf8e23154baa8020c58316cc013b8fa28689c Mon Sep 17 00:00:00 2001 From: Avi Kivity [EMAIL PROTECTED] Date: Mon, 14 Jan 2008 17:47:35 +0200 Subject: [PATCH] kvm: bios: don't advertise the pci interrupts as active high Signed-off-by: Avi Kivity [EMAIL PROTECTED] --- bios/rombios32.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/bios/rombios32.c b/bios/rombios32.c index 967c119..ebfbb24 100755 --- a/bios/rombios32.c +++ b/bios/rombios32.c @@ -1369,7 +1369,7 @@ void acpi_bios_init(void) intsrcovr = (struct madt_intsrcovr*)(io_apic + 1); for ( i = 0; i 16; i++ ) { -if ( PCI_ISA_IRQ_MASK (1U i) ) { +if ( 0 (PCI_ISA_IRQ_MASK (1U i) )) { memset(intsrcovr, 0, sizeof(*intsrcovr)); intsrcovr-type = APIC_XRUPT_OVERRIDE; intsrcovr-length = sizeof(*intsrcovr); -- 1.5.3.7 From 5a5564a0998f42c5394090167d0dbf1e111dde10 Mon Sep 17 00:00:00 2001 From: Avi Kivity [EMAIL
Re: [kvm-devel] Setting hardware breakpoints in guest OS
[EMAIL PROTECTED] wrote: While we tried to make debugging inside the guest work, this was never really tested, so it's likely broken. I'll try to look at what it will take to make it work; I don't think there's much needed. Thinking a bit, it may well be broken only when using the external module. If you use a distro-provided kvm (or compile your own kernel with kvm included) then it might work. Then again, distro provided kvms tend to be old and therefore slow. That sounds encouraging -- I had imagined there might be some impossibility factor in sharing something like hardware breakpoints between host and guest. Both vmx and svm fully allow virtualizing the hardware breakpoints (and even things like last branch recording). For now I'm simply sticking to QEMU+kqemu when I expect deliberate trickiness or need to do hard-breakpoint debugging, and QEMU/KVM (which is up to 50% faster when doing Windows software builds on my PC, nice!) when I don't care. I haven't had any problems loading and using the kvm drivers and kqemu at the same time, and I have assumed that there ought to be no issues in doing so, since they work quite differently and (from my very dangerously limited understanding) ought not to be competing for any mutually exclusive hardware resources. Is that a reasonable assumption? I believe so. I know VirtualBox and VMware have problems coexisting with kvm (since they tend to switch to real mode which is forbidden by vmx), but if kqemu works, then there shouldn't be any hidden problems. What hardware are you using? If you have both AMD and Intel hardware, you might have better luck switching, since this is very subarch dependent. Intel Core Duo (T2400 @ 1.83GHz according to /proc/cpuinfo), running 32-bit Linux 2.6.21.5 using KVM drivers built from the kvm-59 sourceball. Sorry, I don't have other vendors or CPU bitnesses to test on. PS: When I build KVM out of the box, I get a qemu binary called qemu-system-x86_64, though I have a 32-bit CPU and a 32-bit OS. Forgive my ignorance on this, but...why does the name of the binary imply a 64-bit flavour? The qemu binary is 32-bit, but is capable of running a 64-bit guest if you have a 64-bit cpu and a 64-bit kernel (still retaining 32-bit userspace). You can generate a 32-bit only binary, but that does not have any advantages over the 64-bit capable binary. Yes, it confuses me too. -- error compiling committee.c: too many arguments to function - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] KVM swapping with mmu notifiers
On Mon, Jan 14, 2008 at 05:43:58PM +0200, Avi Kivity wrote: heavy handed. Maybe it can be fixed in some clever way with rcu or with a rwlock around the memory slot map. Ok, first the alias looked superflous so I just dropped it (the whole point of the alias is to never call get_user_pages in the aliased hva, so the notifiers should never trigger in those alias gfn ranges). And the code was doing hva-gfn-rmap, while I'm now doing hva-rmap directly to avoid two loops over the memslot instead of only one. To serialize hva_to_rmapp mmap_sem is taken (in any mode) or the mmu_lock is taken, and it's enough to serialize the insertion with the mmu_lock and setting userspace_addr to 0 before insertion if the buffer isn't allocated by userland. Setting userspace_addr atomically will make the memslot visible. This is a new version of the kvm.git patch. Please Marcelo let me know if you see more problems, thanks a lot for the review! Signed-off-by: Andrea Arcangeli [EMAIL PROTECTED] diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 4086080..c527d7d 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -18,6 +18,7 @@ config KVM tristate Kernel-based Virtual Machine (KVM) support depends on ARCH_SUPPORTS_KVM EXPERIMENTAL select PREEMPT_NOTIFIERS + select MMU_NOTIFIER select ANON_INODES ---help--- Support hosting fully virtualized guest machines using hardware diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 324ff9a..189f3e1 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -532,6 +532,38 @@ static void rmap_write_protect(struct kvm *kvm, u64 gfn) kvm_flush_remote_tlbs(kvm); } +static void unmap_spte(struct kvm *kvm, u64 *spte) +{ + struct page *page = pfn_to_page((*spte PT64_BASE_ADDR_MASK) PAGE_SHIFT); + get_page(page); + rmap_remove(kvm, spte); + set_shadow_pte(spte, shadow_trap_nonpresent_pte); + kvm_flush_remote_tlbs(kvm); + __free_page(page); +} + +void kvm_rmap_unmap_hva(struct kvm *kvm, unsigned long hva) +{ + unsigned long *rmapp; + u64 *spte, *curr_spte; + + spin_lock(kvm-mmu_lock); + rmapp = kvm_hva_to_rmapp(kvm, hva); + if (!rmapp) + goto out_unlock; + + spte = rmap_next(kvm, rmapp, NULL); + while (spte) { + BUG_ON(!(*spte PT_PRESENT_MASK)); + rmap_printk(kvm_rmap_unmap_hva: spte %p %llx\n, spte, *spte); + curr_spte = spte; + spte = rmap_next(kvm, rmapp, spte); + unmap_spte(kvm, curr_spte); + } +out_unlock: + spin_unlock(kvm-mmu_lock); +} + #ifdef MMU_DEBUG static int is_empty_shadow_page(u64 *spt) { diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8a90403..271ce12 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3159,6 +3159,33 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) free_page((unsigned long)vcpu-arch.pio_data); } +static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) +{ + return container_of(mn, struct kvm, mmu_notifier); +} + +void kvm_mmu_notifier_invalidate_page(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long address) +{ + struct kvm *kvm = mmu_notifier_to_kvm(mn); + BUG_ON(mm != kvm-mm); + kvm_rmap_unmap_hva(kvm, address); +} + +void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn, + struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + for (; start end; start += PAGE_SIZE) + kvm_mmu_notifier_invalidate_page(mn, mm, start); +} + +static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { + .invalidate_range = kvm_mmu_notifier_invalidate_range, + .invalidate_page= kvm_mmu_notifier_invalidate_page, +}; + struct kvm *kvm_arch_create_vm(void) { struct kvm *kvm = kzalloc(sizeof(struct kvm), GFP_KERNEL); @@ -3167,6 +3194,7 @@ struct kvm *kvm_arch_create_vm(void) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(kvm-arch.active_mmu_pages); + kvm-mmu_notifier.ops = kvm_mmu_notifier_ops; return kvm; } @@ -3219,14 +3247,20 @@ int kvm_arch_set_memory_region(struct kvm *kvm, */ if (!user_alloc) { if (npages !old.rmap) { - memslot-userspace_addr = do_mmap(NULL, 0, -npages * PAGE_SIZE, -PROT_READ | PROT_WRITE, -MAP_SHARED | MAP_ANONYMOUS, -0); - - if (IS_ERR((void *)memslot-userspace_addr)) - return PTR_ERR((void *)memslot-userspace_addr);
Re: [kvm-devel] [kvm-ppc-devel] RFC: MMIO endianness flag
Hollis Blanchard wrote: On Sun, 2008-01-13 at 11:42 +0200, Avi Kivity wrote: Do we really need to propagate endianness all the way to the user? Perhaps libkvm could call the regular mmio functions and do the transformation itself. Or maybe even the kernel can do this by itself? The kernel *already* does this by itself, and I'm attempting to explain why that is not sufficient. My point is precisely that the endianness information must be propagated to the user, otherwise the user may not have all the information it needs to emulate it. Here is the concrete example: * guest writes to MMIO * KVM passes MMIO information (physical address, number of bytes, value) to qemu * Qemu knows from the address that this access is for a passthough device, a special case the administrator has pre-configured * Qemu does mmap(/dev/mem), and writes length bytes of value at offset address. Now here's the catch: what endianness does qemu use when doing the write? If qemu only does BE, then a LE access from the guest will be byte-reversed when presented to the real hardware. Right, I forgot that bit when replying. So, the data is always in natural endianness? If so, we can add an endianness indicator to the mmio callbacks, which qemu will mostly ignore, and only examine it on passthrough. btw, isn't passthrough better handled through the tlb? i.e. actually let the guest access the specially-configured memory? You can have qemu mmap /dev/mem and install it as a memslot, and things should work, no? (well, you might need to set some cachablility flag or other). -- error compiling committee.c: too many arguments to function - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
[kvm-devel] only for better reading
hi, this patch is only for better reading. flo --- x86.c 2008-01-14 18:05:07.0 +0100 +++ changed/x86.c 2008-01-14 18:06:10.0 +0100 @@ -407,10 +407,15 @@ * capabilities of the host cpu. */ static u32 msrs_to_save[] = { - MSR_IA32_SYSENTER_CS, MSR_IA32_SYSENTER_ESP, MSR_IA32_SYSENTER_EIP, + MSR_IA32_SYSENTER_CS, + MSR_IA32_SYSENTER_ESP, + MSR_IA32_SYSENTER_EIP, MSR_K6_STAR, #ifdef CONFIG_X86_64 - MSR_CSTAR, MSR_KERNEL_GS_BASE, MSR_SYSCALL_MASK, MSR_LSTAR, + MSR_CSTAR, + MSR_KERNEL_GS_BASE, + MSR_SYSCALL_MASK, + MSR_LSTAR, #endif MSR_IA32_TIME_STAMP_COUNTER, }; - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] mmu notifiers
On Sat, 12 Jan 2008, Avi Kivity wrote: Two kvm instances mmap() the file (from anywhere) into the guest address space. That memory is shared, and will be backed by the same page structs at the same offset. Duh. Impossible. Two instances of Linux cannot share page structs. So how are you doing this? Or is this just an idea? - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH] mmu notifiers #v2
On Mon, 2008-01-14 at 12:02 -0800, Christoph Lameter wrote: On Sun, 13 Jan 2008, Andrea Arcangeli wrote: About the locking perhaps I'm underestimating it, but by following the TLB flushing analogy, by simply clearing the shadow ptes (with kvm mmu_lock spinlock to avoid racing with other vcpu spte accesses of course) and flushing the shadow-pte after clearing the main linux pte, it should be enough to serialize against shadow-pte page faults that would call into get_user_pages. Flushing the host TLB before or after the shadow-ptes shouldn't matter. Hmmm... In most of the callsites we hold a writelock on mmap_sem right? Not in unmap_mapping_range() afaik. Comments welcome... especially from SGI/IBM/Quadrics and all other potential users of this functionality. There are also certain details I'm uncertain about, like passing 'mm' to the lowlevel methods, my KVM usage of the invalidate_page() notifier for example only uses 'mm' for a BUG_ON for example: Passing mm is fine as long as mmap_sem is held. Passing mm is always a good idea, regardless of the mmap_sem, it can be useful for lots of other things :-) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -86,6 +86,7 @@ do { \ pte_t __pte;\ __pte = ptep_get_and_clear((__vma)-vm_mm, __address, __ptep); \ flush_tlb_page(__vma, __address); \ + mmu_notifier(invalidate_page, (__vma)-vm_mm, __address); \ __pte; \ }) #endif Hmmm... this is ptep_clear_flush? What about the other uses of flush_tlb_page in asm-generic/pgtable.h and related uses in arch code? (would help if your patches would mention the function name in the diff headers) Note that last I looked, a lot of these were stale. Might be time to resume my spring/summer cleaning of page table accessors... +#define mmu_notifier(function, mm, args...) \ + do {\ + struct mmu_notifier *__mn; \ + struct hlist_node *__n; \ + \ + hlist_for_each_entry(__mn, __n, (mm)-mmu_notifier, hlist) \ + if (__mn-ops-function)\ + __mn-ops-function(__mn, mm, args);\ + } while (0) Does this have to be inline? ptep_clear_flush will become quite big -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to [EMAIL PROTECTED] For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: a href=mailto:[EMAIL PROTECTED] [EMAIL PROTECTED] /a - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] mmu notifiers
Christoph Lameter wrote: On Sat, 12 Jan 2008, Avi Kivity wrote: Two kvm instances mmap() the file (from anywhere) into the guest address space. That memory is shared, and will be backed by the same page structs at the same offset. Duh. Impossible. Two instances of Linux cannot share page structs. So how are you doing this? Or is this just an idea? I was describing one Linux host running two guest instances. The page structs are in the host, so they are shared by mmap(). kvm userspace is just an ordinary host process, it can mmap() any file it likes and then assign that virtual memory range to the guest (as guest physical memory). -- error compiling committee.c: too many arguments to function - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] [PATCH] fix cpuid function 4
Dan Kenigsberg wrote: On Mon, Jan 14, 2008 at 02:49:31PM +0100, Alexander Graf wrote: Hi, Currently CPUID function 4 is broken. This function's values rely on the value of ECX. To solve the issue cleanly, there is already a new API for cpuid settings, which is not used yet. Using the current interface, the function 4 can be easily passed through, by giving multiple function 4 outputs and increasing the index-identifier on the fly. This does not break compatibility. This fix is really important for Mac OS X, as it requires cache information. Please also see my previous patches for Mac OS X (or rather core duo target) compatibility. Regards, Alex diff --git a/kernel/x86.c b/kernel/x86.c index b55c177..73312e9 100644 --- a/kernel/x86.c +++ b/kernel/x86.c @@ -783,7 +783,7 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, struct kvm_cpuid *cpuid, struct kvm_cpuid_entry __user *entries) { -int r, i; +int r, i, n = 0; struct kvm_cpuid_entry *cpuid_entries; r = -E2BIG; @@ -803,8 +803,17 @@ static int kvm_vcpu_ioctl_set_cpuid(struct kvm_vcpu *vcpu, vcpu-arch.cpuid_entries[i].ebx = cpuid_entries[i].ebx; vcpu-arch.cpuid_entries[i].ecx = cpuid_entries[i].ecx; vcpu-arch.cpuid_entries[i].edx = cpuid_entries[i].edx; -vcpu-arch.cpuid_entries[i].index = 0; -vcpu-arch.cpuid_entries[i].flags = 0; +switch(vcpu-arch.cpuid_entries[i].function) { +case 4: +vcpu-arch.cpuid_entries[i].index = n; +vcpu-arch.cpuid_entries[i].flags = KVM_CPUID_FLAG_SIGNIFCANT_INDEX; +n++; +break; +default: +vcpu-arch.cpuid_entries[i].index = 0; +vcpu-arch.cpuid_entries[i].flags = 0; +break; +} I will not mention the whitespace damage here :-). Instead, I'd ask you Oh well, after having been into qemu source, I just got used to use spaces instead of tabs ;-). to review, comment, and even try, the patch that I posted here not long ago, exposing all safe host cpuid functions to guests. Sure. Basically your patch targets at a completely different use case than mine though. You want to expose the host features on the virtual CPU, whereas my goal is to have a virtual Core Duo/Solo CPU, even if your host CPU is actually an SVM capable one. So my CoreDuo CPU definition still fails to populate a proper CPUID function 4. With the -cpu host option, Linux works (as it's bright enough to know that some values are just plain wrong), but Darwin crashes. I am not exactly sure why it is, but I guess it's due to the function 4 values exposing a 2-core CPU, which kvm simply doesn't emulate. So far I still like the approach of the cpuid2 API, so it would be great if you could take the patch one step further and have qemu defined CPUs work properly as well. I do not know too much about the kvm code style, so I can't really say anything regarding that. Regards, Alex - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel
Re: [kvm-devel] mmu notifiers
Christoph Lameter wrote: On Sun, 13 Jan 2008, Avi Kivity wrote: I was just explaining how kvm shares memory among guests (which does not require mmu notifiers); if you have some other configuration that can benefit from mmu notifiers, then, well, great. I think you have two page tables pointing to the same memory location right (not to page structs but two ptes)? Without a mmu notifier the pages in this memory range cannot be evicted because otherwise ptes of the other instance will point to a page that is now used for a different purpose. Even with just one guest we can't swap well without mmu notifiers. kvm constructs new page tables for the guest that the Linux vm doesn't know about, so when Linux removes all the ptes, we need a callback to remove the kvm private ptes (and tlb entries). -- error compiling committee.c: too many arguments to function - Check out the new SourceForge.net Marketplace. It's the best place to buy or sell services for just about anything Open Source. http://ad.doubleclick.net/clk;164216239;13503038;w?http://sf.net/marketplace ___ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel