[PATCH] kvm: extboot: properly set int 0x13 return value
From: Glauber Costa [EMAIL PROTECTED] Callers of int 0x13 usually rely on the carry flag being clear/set to indicate the status of the interrupt execution. However, our current code clear or set the flags register, which is totally useless. Whichever value it has, will be overwritten by the flags value _before_ the interrupt, due to the iret instruction. This fixes a bug that prevents slackware (and possibly win2k, untested) to boot. Signed-off-by: Glauber Costa [EMAIL PROTECTED] Acked-by: Anthony Liguori [EMAIL PROTECTED] Signed-off-by: Avi Kivity [EMAIL PROTECTED] diff --git a/extboot/extboot.S b/extboot/extboot.S index 2630abb..e3d1adf 100644 --- a/extboot/extboot.S +++ b/extboot/extboot.S @@ -99,24 +99,24 @@ int19_handler: #define FLAGS_CF 0x01 -.macro clc - push %ax - pushf - pop %ax - and $(~FLAGS_CF), %ax - push %ax - popf - pop %ax +/* The two macro below clear/set the carry flag to indicate the status + * of the interrupt execution. It is not enough to issue a clc/stc instruction, + * since the value of the flags register will be overwritten by whatever is + * in the stack frame + */ +.macro clc_stack + push %bp + mov %sp, %bp + /* 8 = 2 (bp, just pushed) + 2 (ip) + 3 (real mode interrupt frame) */ + and $(~FLAGS_CF), 8(%bp) + pop %bp .endm -.macro stc - push %ax - pushf - pop %ax - or $(FLAGS_CF), %ax - push %ax - popf - pop %ax +.macro stc_stack + push %bp + /* 8 = 2 (bp, just pushed) + 2 (ip) + 3 (real mode interrupt frame) */ + or $(FLAGS_CF), 8(%bp) + pop %bp .endm /* we clobber %bx */ @@ -292,7 +292,7 @@ mul32: /* lo, hi, lo, hi */ disk_reset: movb $0, %ah - clc + clc_stack ret /* this really should be a function, not a macro but i'm lazy */ @@ -395,7 +395,7 @@ disk_reset: pop %ax mov $0, %ah - clc + clc_stack ret .endm @@ -454,12 +454,12 @@ read_disk_drive_parameters: pop %bx /* do this last since it's the most sensitive */ - clc + clc_stack ret alternate_disk_reset: movb $0, %ah - clc + clc_stack ret read_disk_drive_size: @@ -498,21 +498,21 @@ read_disk_drive_size: freea pop %bx - clc + clc_stack ret check_if_extensions_present: mov $0x30, %ah mov $0xAA55, %bx mov $0x07, %cx - clc + clc_stack ret .macro extended_read_write_sectors cmd cmpb $10, 0(%si) jg 1f mov $1, %ah - stc + stc_stack ret 1: push %ax @@ -544,7 +544,7 @@ check_if_extensions_present: pop %ax mov $0, %ah - clc + clc_stack ret .endm @@ -612,12 +612,12 @@ get_extended_drive_parameters: pop %ax mov $0, %ah - clc + clc_stack ret terminate_disk_emulation: mov $1, %ah - stc + stc_stack ret int13_handler: -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: fix handling of ACK from shared guest IRQ
If an assigned device shares a guest irq with an emulated device then we currently interpret an ack generated by the emulated device as originating from the assigned device leading to e.g. Unbalanced enable for IRQ 4347 from the enable_irq() in kvm_assigned_dev_ack_irq(). The fix is fairly simple - don't enable the physical device irq unless it was previously disabled. Of course, this can still lead to a situation where a non-assigned device ACK can cause the physical device irq to be reenabled before the device was serviced. However, being level sensitive, the interrupt will merely be regenerated. Signed-off-by: Mark McLoughlin [EMAIL PROTECTED] --- include/linux/kvm_host.h |1 + virt/kvm/kvm_main.c | 15 ++- 2 files changed, 15 insertions(+), 1 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8091a4d..eafabd5 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -307,6 +307,7 @@ struct kvm_assigned_dev_kernel { int host_busnr; int host_devfn; int host_irq; + bool host_irq_disabled; int guest_irq; struct msi_msg guest_msi; #define KVM_ASSIGNED_DEV_GUEST_INTX(1 0) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index e41d39d..b6cd30a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -170,6 +170,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct work_struct *work) KVM_ASSIGNED_DEV_GUEST_MSI) { assigned_device_msi_dispatch(assigned_dev); enable_irq(assigned_dev-host_irq); + assigned_dev-host_irq_disabled = false; } mutex_unlock(assigned_dev-kvm-lock); kvm_put_kvm(assigned_dev-kvm); @@ -181,8 +182,12 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id) (struct kvm_assigned_dev_kernel *) dev_id; kvm_get_kvm(assigned_dev-kvm); + schedule_work(assigned_dev-interrupt_work); + disable_irq_nosync(irq); + assigned_dev-host_irq_disabled = true; + return IRQ_HANDLED; } @@ -196,8 +201,16 @@ static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian) dev = container_of(kian, struct kvm_assigned_dev_kernel, ack_notifier); + kvm_set_irq(dev-kvm, dev-irq_source_id, dev-guest_irq, 0); - enable_irq(dev-host_irq); + + /* The guest irq may be shared so this ack may be +* from another device. +*/ + if (dev-host_irq_disabled) { + enable_irq(dev-host_irq); + dev-host_irq_disabled = false; + } } static void kvm_free_assigned_irq(struct kvm *kvm, -- 1.5.4.3 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
Avi Kivity wrote: - something did a read-modify-write cycle on cr4 (which contains the svm enable bit) while kvm enabled that bit Well, there are a couple of code paths that do this. I'll look into it. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
Avi Kivity wrote: Avi Kivity wrote: - something did a read-modify-write cycle on cr4 (which contains the svm enable bit) while kvm enabled that bit Well, there are a couple of code paths that do this. I'll look into it. Sorry, that's EFER. It could be something doing a read-modify-write on that. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Kvm: Qemu: save nvram
Daniel P. Berrange wrote: On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote: This patch to save the nvram. It save the nvram by specify the arg of -name.And the saved file named by the arg. If do not specify the arg, it will not save the nvram I think we might be better off having an explicit command line arg for nvram path rather than hardcoding the directory, because there may well be times where you want to have nvram saved, but don't want to specify -name, and vica-verca. -nvram foo.data could prepend a default directory of $localstatedir/lib/qemu/nvram, where $localstatedir is set from 'configure' script, or -nvram /some/path/foo.data would use the explicit path given. I prefer current directory if relative path is given. Since we encourage running qemu as an unprivileged user, and we don't want a world-writable directory, each user will have to provide a private storage location. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04/12] introcude linux/iommu.h for an iommu api
This patch introduces the API to abstract the exported VT-d functions for KVM into a generic API. This way the AMD IOMMU implementation can plug into this API later. Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- include/linux/iommu.h | 109 + 1 files changed, 109 insertions(+), 0 deletions(-) create mode 100644 include/linux/iommu.h diff --git a/include/linux/iommu.h b/include/linux/iommu.h new file mode 100644 index 000..47e9ec8 --- /dev/null +++ b/include/linux/iommu.h @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#ifndef __LINUX_IOMMU_H +#define __LINUX_IOMMU_H + +struct device; + +struct iommu_domain { + void *priv; +}; + +struct iommu_ops { + int (*domain_init)(struct iommu_domain *domain); + void (*domain_destroy)(struct iommu_domain *domain); + int (*attach_dev)(struct iommu_domain *domain, struct device *dev); + void (*detach_dev)(struct iommu_domain *domain, struct device *dev); + int (*map)(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); + void (*unmap)(struct iommu_domain *domain, unsigned long iova, + size_t size); + phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, + unsigned long iova); +}; + +#ifdef CONFIG_IOMMU_API + +extern void register_iommu(struct iommu_ops *ops); +extern bool iommu_found(void); +extern struct iommu_domain *iommu_domain_alloc(void); +extern void iommu_domain_free(struct iommu_domain *domain); +extern int iommu_attach_device(struct iommu_domain *domain, + struct device *dev); +extern void iommu_detach_device(struct iommu_domain *domain, + struct device *dev); +extern int iommu_map_range(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot); +extern void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, + size_t size); +extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, + unsigned long iova); + +#else /* CONFIG_IOMMU_API */ + +static inline void register_iommu(struct iommu_ops *ops) +{ +} + +static inline bool iommu_found(void) +{ + return false; +} + +static inline struct iommu_domain *iommu_domain_alloc(void) +{ + return NULL; +} + +static inline void iommu_domain_free(struct iommu_domain *domain) +{ +} + +static inline int iommu_attach_device(struct iommu_domain *domain, + struct device *dev) +{ + return -ENODEV; +} + +static inline void iommu_detach_device(struct iommu_domain *domain, + struct device *dev) +{ +} + +static inline int iommu_map_range(struct iommu_domain *domain, + unsigned long iova, phys_addr_t paddr, + size_t size, int prot) +{ + return -ENODEV; +} + +static inline void iommu_unmap_range(struct iommu_domain *domain, +unsigned long iova, size_t size) +{ +} + +static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, +unsigned long iova) +{ + return 0; +} + +#endif /* CONFIG_IOMMU_API */ + +#endif /* __LINUX_IOMMU_H */ -- 1.5.6.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/12] add frontend implementation for the IOMMU API
Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- drivers/base/iommu.c | 100 ++ 1 files changed, 100 insertions(+), 0 deletions(-) create mode 100644 drivers/base/iommu.c diff --git a/drivers/base/iommu.c b/drivers/base/iommu.c new file mode 100644 index 000..5e039d4 --- /dev/null +++ b/drivers/base/iommu.c @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2007-2008 Advanced Micro Devices, Inc. + * Author: Joerg Roedel [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include linux/bug.h +#include linux/types.h +#include linux/errno.h +#include linux/iommu.h + +static struct iommu_ops *iommu_ops; + +void register_iommu(struct iommu_ops *ops) +{ + if (iommu_ops) + BUG(); + + iommu_ops = ops; +} + +bool iommu_found() +{ + return iommu_ops != NULL; +} +EXPORT_SYMBOL_GPL(iommu_found); + +struct iommu_domain *iommu_domain_alloc(void) +{ + struct iommu_domain *domain; + int ret; + + domain = kmalloc(sizeof(*domain), GFP_KERNEL); + if (!domain) + return NULL; + + ret = iommu_ops-domain_init(domain); + if (ret) + goto out_free; + + return domain; + +out_free: + kfree(domain); + + return NULL; +} +EXPORT_SYMBOL_GPL(iommu_domain_alloc); + +void iommu_domain_free(struct iommu_domain *domain) +{ + iommu_ops-domain_destroy(domain); + kfree(domain); +} +EXPORT_SYMBOL_GPL(iommu_domain_free); + +int iommu_attach_device(struct iommu_domain *domain, struct device *dev) +{ + return iommu_ops-attach_dev(domain, dev); +} +EXPORT_SYMBOL_GPL(iommu_attach_device); + +void iommu_detach_device(struct iommu_domain *domain, struct device *dev) +{ + iommu_ops-detach_dev(domain, dev); +} +EXPORT_SYMBOL_GPL(iommu_detach_device); + +int iommu_map_range(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot) +{ + return iommu_ops-map(domain, iova, paddr, size, prot); +} +EXPORT_SYMBOL_GPL(iommu_map_range); + +void iommu_unmap_range(struct iommu_domain *domain, unsigned long iova, + size_t size) +{ + iommu_ops-unmap(domain, iova, size); +} +EXPORT_SYMBOL_GPL(iommu_unmap_range); + +phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, + unsigned long iova) +{ + return iommu_ops-iova_to_phys(domain, iova); +} +EXPORT_SYMBOL_GPL(iommu_iova_to_phys); -- 1.5.6.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03/12] KVM: rename vtd.c to iommu.c
Impace: file renamed The code in the vtd.c file can be reused for other IOMMUs as well. So rename it to make it clear that it handle more than VT-d. Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- arch/ia64/kvm/Makefile |2 +- arch/x86/kvm/Makefile |2 +- virt/kvm/{vtd.c = iommu.c} |0 3 files changed, 2 insertions(+), 2 deletions(-) rename virt/kvm/{vtd.c = iommu.c} (100%) diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index 76464dc..cb69dfc 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -52,7 +52,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o) ifeq ($(CONFIG_DMAR),y) -common-objs += $(addprefix ../../../virt/kvm/, vtd.o) +common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif kvm-objs := $(common-objs) kvm-ia64.o kvm_fw.o diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index c023435..00f46c2 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -8,7 +8,7 @@ ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif ifeq ($(CONFIG_DMAR),y) -common-objs += $(addprefix ../../../virt/kvm/, vtd.o) +common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif EXTRA_CFLAGS += -Ivirt/kvm -Iarch/x86/kvm diff --git a/virt/kvm/vtd.c b/virt/kvm/iommu.c similarity index 100% rename from virt/kvm/vtd.c rename to virt/kvm/iommu.c -- 1.5.6.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Kvm: Qemu: save nvram
On Tue, Dec 02, 2008 at 03:01:20PM +0200, Avi Kivity wrote: Daniel P. Berrange wrote: On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote: This patch to save the nvram. It save the nvram by specify the arg of -name.And the saved file named by the arg. If do not specify the arg, it will not save the nvram I think we might be better off having an explicit command line arg for nvram path rather than hardcoding the directory, because there may well be times where you want to have nvram saved, but don't want to specify -name, and vica-verca. -nvram foo.data could prepend a default directory of $localstatedir/lib/qemu/nvram, where $localstatedir is set from 'configure' script, or -nvram /some/path/foo.data would use the explicit path given. I prefer current directory if relative path is given. Since we encourage running qemu as an unprivileged user, and we don't want a world-writable directory, each user will have to provide a private storage location. Fine by me - avoids needing to embed any path in QEMU code at all then Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2138166 ] Vista guest fails to start on kvm-76
Bugs item #2138166, was opened at 2008-09-30 15:39 Message generated for change (Comment added) made by technologov You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: qemu Group: None Status: Open Resolution: Fixed Priority: 5 Private: No Submitted By: John Rousseau (johnrrousseau) Assigned to: Nobody/Anonymous (nobody) Summary: Vista guest fails to start on kvm-76 Initial Comment: CPU: Intel(R) Core(TM)2 Duo CPU T7250 @ 2.00GHz Build: kvm-76 Host kernel: 2.6.26.3-29.fc9.x86_64 Host arch: x86_64 Guest: Windows Vista Ultimate 64-bit QEMU command: qemu-system-x86_64 -hda /home/jrr/vista-x86_64.img -m 2048M -net nic,vlan=0,macaddr=52:54:00:12:32:00 -net tap,vlan=0,ifname=tap0 -std-vga -full-screen -smp 2 I've been running this guest on this host with kvm-75 without difficulty. kvm-76, built the same way that kvm-75 was (and on the same machine), fails to start my guest. The guest window is up, but the guest fails to complete startup. Command line output is: kvm_create_phys_mem: File existsset_vram_mapping: cannot allocate memory: File exists set_vram_mapping failed kvm: get_dirty_pages returned -2 The last line repeats hundreds of times. -- Comment By: Technologov (technologov) Date: 2008-12-02 15:39 Message: Please close bug. -- Comment By: John Rousseau (johnrrousseau) Date: 2008-10-12 14:50 Message: I've confirmed that this issue is resolved with kvm-77. -- Comment By: Marco Menardi (markit) Date: 2008-10-10 14:02 Message: I've the same issue with my XP-32 guests, I've Debian64 sid, Phenom 9550, kernel 2.6.26-1-amd64. Everything works like a charm with kvm-75 instead (and I've had to revert to 75, of course). Any news? Would love to have forecoming kvm77 with this blocking bug fixed. -- Comment By: John Rousseau (johnrrousseau) Date: 2008-10-03 03:06 Message: kvm-2646c5.tar.gz: Worked fine kvm-d558461.tar.gz: Failed (showed this bug) I've never used git before, but if you teach me to fish... I installed git, pulled the userspace and kernel trees, built kvm-75 and kvm-76 and got the expected results, but when I did a bisect on kvm-75 (good) and kvm-76 (bad) I kept getting sparse trees that I couldn't build. configure among other things was missing. What am I doing wrong? Also, what should I be syncing my kernel tree to when I am bisecting the userspace tree? Thanks. -- Comment By: Glauber de Oliveira Costa (glommer) Date: 2008-10-02 19:27 Message: Are you using git? If so, can you bisect to find out who the culprit is? If not, I've managed to archive two strategic commits you should try: http://glommer.net/kvm-2646c5.tar.gz and http://glommer.net/kvm-d558461.tar.gz please report success or failure with them thanks! -- Comment By: John Rousseau (johnrrousseau) Date: 2008-10-02 18:48 Message: I applied the patch to kvm-76 and ran into basically the same problem. The guest still hung during boot and I got the plume of kvm: get_dirty_pages returned -2 errors, but the first message kvm_create_phys_mem: File existsset_vram_mapping: cannot allocate memory: File exists wasn't displayed. -- Comment By: Glauber de Oliveira Costa (glommer) Date: 2008-10-02 16:01 Message: can you please test the patch at http://glommer.net/band-aid.patch ? -- Comment By: Brian Jackson (iggy_cav) Date: 2008-09-30 17:06 Message: This was reported on the mailing list. It's a problem with sdl output. Not specific to any guest. Until the problem is fixed, I'd suggest using vnc output. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2138166 ] Vista guest fails to start on kvm-76
Bugs item #2138166, was opened at 2008-09-30 08:39 Message generated for change (Settings changed) made by johnrrousseau You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: qemu Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: John Rousseau (johnrrousseau) Assigned to: Nobody/Anonymous (nobody) Summary: Vista guest fails to start on kvm-76 Initial Comment: CPU: Intel(R) Core(TM)2 Duo CPU T7250 @ 2.00GHz Build: kvm-76 Host kernel: 2.6.26.3-29.fc9.x86_64 Host arch: x86_64 Guest: Windows Vista Ultimate 64-bit QEMU command: qemu-system-x86_64 -hda /home/jrr/vista-x86_64.img -m 2048M -net nic,vlan=0,macaddr=52:54:00:12:32:00 -net tap,vlan=0,ifname=tap0 -std-vga -full-screen -smp 2 I've been running this guest on this host with kvm-75 without difficulty. kvm-76, built the same way that kvm-75 was (and on the same machine), fails to start my guest. The guest window is up, but the guest fails to complete startup. Command line output is: kvm_create_phys_mem: File existsset_vram_mapping: cannot allocate memory: File exists set_vram_mapping failed kvm: get_dirty_pages returned -2 The last line repeats hundreds of times. -- Comment By: Technologov (technologov) Date: 2008-12-02 08:39 Message: Please close bug. -- Comment By: John Rousseau (johnrrousseau) Date: 2008-10-12 08:50 Message: I've confirmed that this issue is resolved with kvm-77. -- Comment By: Marco Menardi (markit) Date: 2008-10-10 08:02 Message: I've the same issue with my XP-32 guests, I've Debian64 sid, Phenom 9550, kernel 2.6.26-1-amd64. Everything works like a charm with kvm-75 instead (and I've had to revert to 75, of course). Any news? Would love to have forecoming kvm77 with this blocking bug fixed. -- Comment By: John Rousseau (johnrrousseau) Date: 2008-10-02 20:06 Message: kvm-2646c5.tar.gz: Worked fine kvm-d558461.tar.gz: Failed (showed this bug) I've never used git before, but if you teach me to fish... I installed git, pulled the userspace and kernel trees, built kvm-75 and kvm-76 and got the expected results, but when I did a bisect on kvm-75 (good) and kvm-76 (bad) I kept getting sparse trees that I couldn't build. configure among other things was missing. What am I doing wrong? Also, what should I be syncing my kernel tree to when I am bisecting the userspace tree? Thanks. -- Comment By: Glauber de Oliveira Costa (glommer) Date: 2008-10-02 12:27 Message: Are you using git? If so, can you bisect to find out who the culprit is? If not, I've managed to archive two strategic commits you should try: http://glommer.net/kvm-2646c5.tar.gz and http://glommer.net/kvm-d558461.tar.gz please report success or failure with them thanks! -- Comment By: John Rousseau (johnrrousseau) Date: 2008-10-02 11:48 Message: I applied the patch to kvm-76 and ran into basically the same problem. The guest still hung during boot and I got the plume of kvm: get_dirty_pages returned -2 errors, but the first message kvm_create_phys_mem: File existsset_vram_mapping: cannot allocate memory: File exists wasn't displayed. -- Comment By: Glauber de Oliveira Costa (glommer) Date: 2008-10-02 09:01 Message: can you please test the patch at http://glommer.net/band-aid.patch ? -- Comment By: Brian Jackson (iggy_cav) Date: 2008-09-30 10:06 Message: This was reported on the mailing list. It's a problem with sdl output. Not specific to any guest. Until the problem is fixed, I'd suggest using vnc output. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2353811 ] Solaris 10 guest unstable
Bugs item #2353811, was opened at 2008-11-27 17:44 Message generated for change (Comment added) made by technologov You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2353811group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: krwi (krwi) Assigned to: Nobody/Anonymous (nobody) Summary: Solaris 10 guest unstable Initial Comment: After several minutes of normal work Solaris guest hangs for few seconds. Sometimes system hangs completely and kvm proccess must be killed. Besides this host CPU utilization ist much higher than could be expected from guest uasage. Host: 2x Quad Core Opteron 8356 64GB RAM OS: Gentoo 64bit kernel: 2.6.26 KVM-79 (modules from package not from kernel) Guest: Solaris 10u5 started with command: kvm -M pc -m 4096 -smp 2 -name despina -monitor pty -boot c -drive file=/dev/MDvg_SystemVM/despina,if=ide,index=0,boot=on -drive file=/dev/MDvg_DataVM/despina30,if=ide,index=1 -net nic,macaddr=00:16:3e:5f:64:10,vlan=0,model=e1000 -net tap,fd=38,script=,vlan=0,ifname=vnet8 -serial pty -parallel none Usind -no-acpi switch doesn't help. On the same host I running several other guests systems including Win2003 Server sp2 32 and 64bit, WinXP sp2 32bit, Gentoo 64bit and Debian Lenny 32bit without problem like this. -- Comment By: Technologov (technologov) Date: 2008-12-02 15:46 Message: From my testing, Yes, 64-bit Solaris guest is problematic, and requires patching for KVM to work, but Solaris 32-bit guest should work fine. Can you tell us which Solaris you use ? (32 or 64-bit) -Alexey -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2353811group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2088475 ] OpenSuse10.2 can not be installed
Bugs item #2088475, was opened at 2008-09-02 11:37 Message generated for change (Comment added) made by technologov You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2088475group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: Jiajun Xu (jiajun) Assigned to: Nobody/Anonymous (nobody) Summary: OpenSuse10.2 can not be installed Initial Comment: OpenSuse10.2 can not be installed on KVM. Installer will stop after loading ISOLinux. It is against latest kvm comit, kvm.git :5b9207ec01681337786c7898ffc0165ec4e7c2e4 userspace.git :5f2a9719f105e29fbde4529cf919a5351b05da9a. -- Comment By: Technologov (technologov) Date: 2008-12-02 15:58 Message: It crashed with old KVMs, but with newer it just stucks. Doesn't matters. And yes, openSUSE 11.0 tested to work. -- Comment By: Jiajun Xu (jiajun) Date: 2008-10-16 17:23 Message: From the bug description, opensuse11.0 should work? And we did not meet guest crash when installation, guest hangs when loading grub and no any error messages printed. -- Comment By: Technologov (technologov) Date: 2008-10-16 17:04 Message: Known issue: https://sourceforge.net/tracker/index.php?func=detailaid=1760424group_id=180599atid=893831 This bug is duplicate. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2088475group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[GIT PULL][RESEND] KVM updates for Linux 2.6.28-rc6
Linus, please pull some kvm fixes from repo and branch at: git://git.kernel.org/pub/scm/linux/kernel/git/avi/kvm.git kvm-updates/2.6.28 There are a couple of fixes for the out-of-sync mmu, a fix for a lost irq while injecting an nmi (which causes guests with an nmi watchdog to hang), as well as fixes for the ppc, s390, and ia64 kvm ports. Avi Kivity (1): KVM: VMX: Fix interrupt loss during race with NMI Christian Borntraeger (1): KVM: s390: Fix problem state handling in guest sigp handler Hollis Blanchard (1): KVM: ppc: stop leaking host memory on VM exit Marcelo Tosatti (2): KVM: MMU: fix sync of ptes addressed at owner pagetable KVM: MMU: avoid creation of unreachable pages in the shadow Xiantao Zhang (2): KVM: ia64: Fix incorrect kbuild CFLAGS override KVM: ia64: Fix: Use correct calling convention for PAL_VPS_RESUME_HANDLER arch/ia64/kvm/Makefile |2 +- arch/ia64/kvm/optvfault.S | 11 +++ arch/powerpc/include/asm/kvm_ppc.h |2 ++ arch/powerpc/kvm/44x_tlb.c |8 arch/powerpc/kvm/powerpc.c |1 + arch/s390/kvm/sigp.c |5 + arch/x86/kvm/mmu.c |2 +- arch/x86/kvm/paging_tmpl.h |1 + arch/x86/kvm/vmx.c |4 +++- 9 files changed, 29 insertions(+), 7 deletions(-) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2138166 ] Vista guest fails to start on kvm-76
Bugs item #2138166, was opened at 2008-09-30 15:39 Message generated for change (Comment added) made by technologov You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: qemu Group: None Status: Closed Resolution: Fixed Priority: 5 Private: No Submitted By: John Rousseau (johnrrousseau) Assigned to: Nobody/Anonymous (nobody) Summary: Vista guest fails to start on kvm-76 Initial Comment: CPU: Intel(R) Core(TM)2 Duo CPU T7250 @ 2.00GHz Build: kvm-76 Host kernel: 2.6.26.3-29.fc9.x86_64 Host arch: x86_64 Guest: Windows Vista Ultimate 64-bit QEMU command: qemu-system-x86_64 -hda /home/jrr/vista-x86_64.img -m 2048M -net nic,vlan=0,macaddr=52:54:00:12:32:00 -net tap,vlan=0,ifname=tap0 -std-vga -full-screen -smp 2 I've been running this guest on this host with kvm-75 without difficulty. kvm-76, built the same way that kvm-75 was (and on the same machine), fails to start my guest. The guest window is up, but the guest fails to complete startup. Command line output is: kvm_create_phys_mem: File existsset_vram_mapping: cannot allocate memory: File exists set_vram_mapping failed kvm: get_dirty_pages returned -2 The last line repeats hundreds of times. -- Comment By: Technologov (technologov) Date: 2008-12-02 16:02 Message: Please close bug. -- Comment By: Technologov (technologov) Date: 2008-12-02 15:39 Message: Please close bug. -- Comment By: John Rousseau (johnrrousseau) Date: 2008-10-12 14:50 Message: I've confirmed that this issue is resolved with kvm-77. -- Comment By: Marco Menardi (markit) Date: 2008-10-10 14:02 Message: I've the same issue with my XP-32 guests, I've Debian64 sid, Phenom 9550, kernel 2.6.26-1-amd64. Everything works like a charm with kvm-75 instead (and I've had to revert to 75, of course). Any news? Would love to have forecoming kvm77 with this blocking bug fixed. -- Comment By: John Rousseau (johnrrousseau) Date: 2008-10-03 03:06 Message: kvm-2646c5.tar.gz: Worked fine kvm-d558461.tar.gz: Failed (showed this bug) I've never used git before, but if you teach me to fish... I installed git, pulled the userspace and kernel trees, built kvm-75 and kvm-76 and got the expected results, but when I did a bisect on kvm-75 (good) and kvm-76 (bad) I kept getting sparse trees that I couldn't build. configure among other things was missing. What am I doing wrong? Also, what should I be syncing my kernel tree to when I am bisecting the userspace tree? Thanks. -- Comment By: Glauber de Oliveira Costa (glommer) Date: 2008-10-02 19:27 Message: Are you using git? If so, can you bisect to find out who the culprit is? If not, I've managed to archive two strategic commits you should try: http://glommer.net/kvm-2646c5.tar.gz and http://glommer.net/kvm-d558461.tar.gz please report success or failure with them thanks! -- Comment By: John Rousseau (johnrrousseau) Date: 2008-10-02 18:48 Message: I applied the patch to kvm-76 and ran into basically the same problem. The guest still hung during boot and I got the plume of kvm: get_dirty_pages returned -2 errors, but the first message kvm_create_phys_mem: File existsset_vram_mapping: cannot allocate memory: File exists wasn't displayed. -- Comment By: Glauber de Oliveira Costa (glommer) Date: 2008-10-02 16:01 Message: can you please test the patch at http://glommer.net/band-aid.patch ? -- Comment By: Brian Jackson (iggy_cav) Date: 2008-09-30 17:06 Message: This was reported on the mailing list. It's a problem with sdl output. Not specific to any guest. Until the problem is fixed, I'd suggest using vnc output. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2138166group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/12] VT-d: Support multiple device assignment for KVM
From: Weidong Han [EMAIL PROTECTED] In order to support multiple device assignment for KVM, this patch does following main changes: - extend dmar_domain to own multiple devices from different iommus, use a bitmap of iommus to replace iommu pointer in dmar_domain. - implement independent low level functions for kvm, then won't impact native VT-d. - SAGAW capability may be different across iommus, that's to say the VT-d page table levels may be different among iommus. This patch uses a defaut agaw, and skip top levels of page tables for iommus which have smaller agaw than default. - rename the APIs for kvm VT-d, make it more readable. [Joerg: coding style cleanups] Signed-off-by: Weidong Han [EMAIL PROTECTED] Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- drivers/pci/dmar.c| 15 +- drivers/pci/intel-iommu.c | 696 ++-- include/linux/dma_remapping.h | 21 +- include/linux/dmar.h |2 + include/linux/intel-iommu.h | 21 +- 5 files changed, 636 insertions(+), 119 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 691b3ad..d54d3db 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -484,13 +484,14 @@ void __init detect_intel_iommu(void) dmar_tbl = NULL; } - int alloc_iommu(struct dmar_drhd_unit *drhd) { struct intel_iommu *iommu; int map_size; u32 ver; static int iommu_allocated = 0; + unsigned long sagaw; + int agaw; iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -506,6 +507,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu-cap = dmar_readq(iommu-reg + DMAR_CAP_REG); iommu-ecap = dmar_readq(iommu-reg + DMAR_ECAP_REG); + /* set agaw, SAGAW may be different across iommus */ + sagaw = cap_sagaw(iommu-cap); + for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); +agaw = 0; agaw--) + if (test_bit(agaw, sagaw)) + break; + if (agaw 0) { + printk(KERN_ERR IOMMU: unsupported sagaw %lx\n, sagaw); + goto error; + } + iommu-agaw = agaw; + /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu-ecap), cap_max_fault_reg_offset(iommu-cap)); diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 5c8baa4..7f12852 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -50,8 +50,6 @@ #define IOAPIC_RANGE_END (0xfeef) #define IOVA_START_ADDR(0x1000) -#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 - #define DOMAIN_MAX_ADDR(gaw) u64)1) gaw) - 1) @@ -64,6 +62,7 @@ struct deferred_flush_tables { int next; struct iova *iova[HIGH_WATER_MARK]; struct dmar_domain *domain[HIGH_WATER_MARK]; + struct intel_iommu *iommu; }; static struct deferred_flush_tables *deferred_flush; @@ -184,6 +183,68 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } +/* in native case, each domain is related to only one iommu */ +static struct intel_iommu *domain_get_only_iommu(struct dmar_domain *domain) +{ + struct dmar_drhd_unit *drhd; + + for_each_drhd_unit(drhd) { + if (drhd-ignored) + continue; + if (test_bit(drhd-iommu-seq_id, domain-iommu_bmp)) + return drhd-iommu; + } + + return NULL; +} + +static void domain_flush_cache(struct dmar_domain *domain, + void *addr, int size) +{ + struct intel_iommu *iommu; + + if (domain-flags DOMAIN_FLAG_VIRTUAL_MACHINE) { + struct dmar_drhd_unit *drhd; + + for_each_drhd_unit(drhd) { + if (drhd-ignored) + continue; + iommu = drhd-iommu; + + if (!test_bit(iommu-seq_id, domain-iommu_bmp)) + continue; + + if (!ecap_coherent(iommu-ecap)) + clflush_cache_range(addr, size); + } + } else { + iommu = domain_get_only_iommu(domain); + if (iommu !ecap_coherent(iommu-ecap)) + clflush_cache_range(addr, size); + } +} + +static struct intel_iommu *device_find_matched_iommu(u8 bus, u8 devfn) +{ + struct dmar_drhd_unit *drhd = NULL; + int i; + + for_each_drhd_unit(drhd) { + if (drhd-ignored) + continue; + + for (i = 0; i drhd-devices_cnt; i++) + if (drhd-devices[i]-bus-number == bus + drhd-devices[i]-devfn == devfn) + return drhd-iommu; + + if (drhd-include_all) + return drhd-iommu; +
Re: [PATCH] extboot: properly set int 0x13 return value
Glauber Costa wrote: Callers of int 0x13 usually rely on the carry flag being clear/set to indicate the status of the interrupt execution. However, our current code clear or set the flags register, which is totally useless. Whichever value it has, will be overwritten by the flags value _before_ the interrupt, due to the iret instruction. This fixes a bug that prevents slackware (and possibly win2k, untested) to boot. Applied, thanks. -.macro clc - push %ax - pushf - pop %ax - and $(~FLAGS_CF), %ax - push %ax - popf - pop %ax Anthony, any reason you did not use the 'clc' instruction instead of a macro? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] extboot: properly set int 0x13 return value
Avi Kivity wrote: Applied, thanks. -.macro clc -push %ax -pushf -pop %ax -and $(~FLAGS_CF), %ax -push %ax -popf -pop %ax Anthony, any reason you did not use the 'clc' instruction instead of a macro? Propensity for pain? I have no idea. I assume I had a reason at the time. Probably debug related. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/13] [v3] Support VT-d multiple device assignment for KVM
This patchset supports VT-d multiple device assignment for KVM. Main changes from V2 to V3: - change intel iommu APIs, which are consistent with Joerg's generic iommu APIs. - split to a serial smaller patches for easy reviewing. Regards, Weidong-- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/13] move page table handling utility functions
move page table handling utility functions from intel-iommu.c to dma_remapping.h, because some of them will be used in other .c files. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 45 include/linux/dma_remapping.h | 46 - 2 files changed, 45 insertions(+), 46 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 39c5e9d..a18e0b4 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -288,51 +288,6 @@ out: spin_unlock_irqrestore(iommu-lock, flags); } -/* page table handling */ -#define LEVEL_STRIDE (9) -#define LEVEL_MASK (((u64)1 LEVEL_STRIDE) - 1) - -static inline int agaw_to_level(int agaw) -{ - return agaw + 2; -} - -static inline int agaw_to_width(int agaw) -{ - return 30 + agaw * LEVEL_STRIDE; - -} - -static inline int width_to_agaw(int width) -{ - return (width - 30) / LEVEL_STRIDE; -} - -static inline unsigned int level_to_offset_bits(int level) -{ - return (12 + (level - 1) * LEVEL_STRIDE); -} - -static inline int address_level_offset(u64 addr, int level) -{ - return ((addr level_to_offset_bits(level)) LEVEL_MASK); -} - -static inline u64 level_mask(int level) -{ - return ((u64)-1 level_to_offset_bits(level)); -} - -static inline u64 level_size(int level) -{ - return ((u64)1 level_to_offset_bits(level)); -} - -static inline u64 align_to_level(u64 addr, int level) -{ - return ((addr + level_size(level) - 1) level_mask(level)); -} - static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) { int addr_width = agaw_to_width(domain-agaw); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 66f7887..eeb8243 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -13,6 +13,50 @@ #define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK) #define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK) +/* page table handling */ +#define VTD_LEVEL_STRIDE (9) +#define VTD_LEVEL_MASK (((u64)1 VTD_LEVEL_STRIDE) - 1) + +static inline int agaw_to_level(int agaw) +{ + return agaw + 2; +} + +static inline int agaw_to_width(int agaw) +{ + return 30 + agaw * VTD_LEVEL_STRIDE; +} + +static inline int width_to_agaw(int width) +{ + return (width - 30) / VTD_LEVEL_STRIDE; +} + +static inline unsigned int level_to_offset_bits(int level) +{ + return 12 + (level - 1) * VTD_LEVEL_STRIDE; +} + +static inline int address_level_offset(u64 addr, int level) +{ + return (addr level_to_offset_bits(level)) VTD_LEVEL_MASK; +} + +static inline u64 level_mask(int level) +{ + return (u64)-1 level_to_offset_bits(level); +} + +static inline u64 level_size(int level) +{ + return (u64)1 level_to_offset_bits(level); +} + +static inline u64 align_to_level(u64 addr, int level) +{ + return (addr + level_size(level) - 1) level_mask(level); +} + /* * 0: Present @@ -27,7 +71,7 @@ struct root_entry { #define ROOT_ENTRY_NR (VTD_PAGE_SIZE/sizeof(struct root_entry)) static inline bool root_present(struct root_entry *root) { - return (root-val 1); + return root-val 1; } static inline void set_root_present(struct root_entry *root) { -- 1.5.1 0002-move-page-table-handling-utility-functions.patch Description: 0002-move-page-table-handling-utility-functions.patch
[PATCH 01/13] iommu bitmap insteads of iommu pointer in dmar_domain
Support dmar_domain own multiple devices from different iommus, which are set in iommu bitmap. add function domain_get_iommu() to get the only one iommu of domain in native VT-d usage. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 102 include/linux/dma_remapping.h |2 +- 2 files changed, 72 insertions(+), 32 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 5c8baa4..39c5e9d 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -64,6 +64,7 @@ struct deferred_flush_tables { int next; struct iova *iova[HIGH_WATER_MARK]; struct dmar_domain *domain[HIGH_WATER_MARK]; + struct intel_iommu *iommu; }; static struct deferred_flush_tables *deferred_flush; @@ -184,6 +185,21 @@ void free_iova_mem(struct iova *iova) kmem_cache_free(iommu_iova_cache, iova); } +/* in native case, each domain is related to only one iommu */ +static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) +{ + struct dmar_drhd_unit *drhd; + + for_each_drhd_unit(drhd) { + if (drhd-ignored) + continue; + if (test_bit(drhd-iommu-seq_id, domain-iommu_bmp)) + return drhd-iommu; + } + + return NULL; +} + /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -324,6 +340,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) int level = agaw_to_level(domain-agaw); int offset; unsigned long flags; + struct intel_iommu *iommu = domain_get_iommu(domain); BUG_ON(!domain-pgd); @@ -347,7 +364,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) flags); return NULL; } - __iommu_flush_cache(domain-iommu, tmp_page, + __iommu_flush_cache(iommu, tmp_page, PAGE_SIZE); dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); /* @@ -356,7 +373,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) */ dma_set_pte_readable(*pte); dma_set_pte_writable(*pte); - __iommu_flush_cache(domain-iommu, pte, sizeof(*pte)); + __iommu_flush_cache(iommu, pte, sizeof(*pte)); } parent = phys_to_virt(dma_pte_addr(*pte)); level--; @@ -393,13 +410,14 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) { struct dma_pte *pte = NULL; + struct intel_iommu *iommu = domain_get_iommu(domain); /* get last level pte */ pte = dma_addr_level_pte(domain, addr, 1); if (pte) { dma_clear_pte(*pte); - __iommu_flush_cache(domain-iommu, pte, sizeof(*pte)); + __iommu_flush_cache(iommu, pte, sizeof(*pte)); } } @@ -428,6 +446,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int addr_width = agaw_to_width(domain-agaw); struct dma_pte *pte; int total = agaw_to_level(domain-agaw); + struct intel_iommu *iommu = domain_get_iommu(domain); int level; u64 tmp; @@ -447,7 +466,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, free_pgtable_page( phys_to_virt(dma_pte_addr(*pte))); dma_clear_pte(*pte); - __iommu_flush_cache(domain-iommu, + __iommu_flush_cache(iommu, pte, sizeof(*pte)); } tmp += level_size(level); @@ -1006,7 +1025,8 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) set_bit(num, iommu-domain_ids); domain-id = num; - domain-iommu = iommu; + memset(domain-iommu_bmp, 0, sizeof(unsigned long)); + set_bit(iommu-seq_id, domain-iommu_bmp); iommu-domains[num] = domain; spin_unlock_irqrestore(iommu-lock, flags); @@ -1016,10 +1036,12 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) static void iommu_free_domain(struct dmar_domain *domain) { unsigned long flags; + struct intel_iommu *iommu; - spin_lock_irqsave(domain-iommu-lock, flags); - clear_bit(domain-id, domain-iommu-domain_ids); -
[PATCH 03/13] set iommu agaw
agaw may be different across iommus. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/dmar.c| 14 ++ include/linux/dma_remapping.h |2 ++ include/linux/intel-iommu.h |1 + 3 files changed, 17 insertions(+), 0 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 691b3ad..ebcc7c2 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -491,6 +491,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) int map_size; u32 ver; static int iommu_allocated = 0; + unsigned long sagaw; + int agaw; iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -506,6 +508,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu-cap = dmar_readq(iommu-reg + DMAR_CAP_REG); iommu-ecap = dmar_readq(iommu-reg + DMAR_ECAP_REG); + /* set agaw, SAGAW may be different across iommus */ + sagaw = cap_sagaw(iommu-cap); + for (agaw = width_to_agaw(DEFAULT_DOMAIN_ADDRESS_WIDTH); +agaw = 0; agaw--) + if (test_bit(agaw, sagaw)) + break; + if (agaw 0) { + printk(KERN_ERR IOMMU: unsupported sagaw %lx\n, sagaw); + goto error; + } + iommu-agaw = agaw; + /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu-ecap), cap_max_fault_reg_offset(iommu-cap)); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index eeb8243..c9d99c9 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -1,6 +1,8 @@ #ifndef _DMA_REMAPPING_H #define _DMA_REMAPPING_H +#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 + /* * VT-d hardware uses 4KiB page size regardless of host page size. */ diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 3d017cf..24a2945 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -299,6 +299,7 @@ struct intel_iommu { struct dmar_domain **domains; /* ptr to domains */ spinlock_t lock; /* protect context, domain ids */ struct root_entry *root_entry; /* virtual address */ + int agaw; /* agaw of this iommu */ unsigned int irq; unsigned char name[7];/* Device Name */ -- 1.5.1 0003-set-iommu-agaw.patch Description: 0003-set-iommu-agaw.patch
[PATCH 04/13] iommu coherency
in dmar_domain, more than one iommus may be included in iommu_bmp. Due to Coherency capability may be different across iommus, set this variable to indicate iommu access is coherent or not. Only when all related iommus in a dmar_domain are all coherent, iommu access of this domain is coherent. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c |6 ++ include/linux/dma_remapping.h |2 ++ 2 files changed, 8 insertions(+), 0 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index a18e0b4..fa1507b 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -982,6 +982,12 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) domain-id = num; memset(domain-iommu_bmp, 0, sizeof(unsigned long)); set_bit(iommu-seq_id, domain-iommu_bmp); + + if (ecap_coherent(iommu-ecap)) + domain-iommu_coherency = 1; + else + domain-iommu_coherency = 0; + iommu-domains[num] = domain; spin_unlock_irqrestore(iommu-lock, flags); diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index c9d99c9..add2111 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -175,6 +175,8 @@ struct dmar_domain { #define DOMAIN_FLAG_MULTIPLE_DEVICES 1 int flags; + + int iommu_coherency;/* iommu access is coherent or not */ }; /* PCI domain-device relationship */ -- 1.5.1 0004-iommu-coherency.patch Description: 0004-iommu-coherency.patch
[PATCH 05/13] add domain flag DOMAIN_FLAG_VIRTUAL_MACHINE
By default, one domain owns one device, like native VT-d usage. For kvm VT-d usage, more than one devices across iommus may be assigned to one domain, flag DOMAIN_FLAG_VIRTUAL_MACHINE is for this usage. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c |3 ++- include/linux/dma_remapping.h | 11 ++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index fa1507b..09a5150 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -989,6 +989,7 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) domain-iommu_coherency = 0; iommu-domains[num] = domain; + domain-flags = 0; spin_unlock_irqrestore(iommu-lock, flags); return domain; @@ -1387,7 +1388,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) info-dev = NULL; info-domain = domain; /* This domain is shared by devices under p2p bridge */ - domain-flags |= DOMAIN_FLAG_MULTIPLE_DEVICES; + domain-flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; /* pcie-to-pci bridge already has a domain, uses it */ found = NULL; diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index add2111..9e39c99 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -159,6 +159,16 @@ struct dma_pte { struct intel_iommu; +/* domain flags, one domain owns one device by default */ + +/* devices under the same p2p bridge are owned in one domain */ +#define DOMAIN_FLAG_P2P_MULTIPLE_DEVICES (1 0) + +/* domain represents a virtual machine, more than one devices + * across iommus may be owned in one domain, e.g. kvm guest. + */ +#define DOMAIN_FLAG_VIRTUAL_MACHINE(1 1) + struct dmar_domain { int id; /* domain id */ unsigned long iommu_bmp;/* bitmap of iommus this domain uses*/ @@ -173,7 +183,6 @@ struct dmar_domain { /* adjusted guest address width, 0 is level 2 30-bit */ int agaw; -#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 int flags; int iommu_coherency;/* iommu access is coherent or not */ -- 1.5.1 0005-add-domain-flag-DOMAIN_FLAG_VIRTUAL_MACHINE.patch Description: 0005-add-domain-flag-DOMAIN_FLAG_VIRTUAL_MACHINE.patch
[PATCH 06/13] add/remove domain device info for virtual machine VT-d
Separate add/remove domain device info functions for virtual machine VT-d from natvie VT-d. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 164 +++- include/linux/dma_remapping.h |1 + 2 files changed, 160 insertions(+), 5 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 09a5150..429aff4 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -200,6 +200,27 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) return NULL; } +static struct intel_iommu *device_find_matched_iommu(u8 bus, u8 devfn) +{ + struct dmar_drhd_unit *drhd = NULL; + int i; + + for_each_drhd_unit(drhd) { + if (drhd-ignored) + continue; + + for (i = 0; i drhd-devices_cnt; i++) + if (drhd-devices[i]-bus-number == bus + drhd-devices[i]-devfn == devfn) + return drhd-iommu; + + if (drhd-include_all) + return drhd-iommu; + } + + return NULL; +} + /* Gets context entry for a given bus and devfn */ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -934,7 +955,8 @@ void free_dmar_iommu(struct intel_iommu *iommu) for (; i cap_ndoms(iommu-cap); ) { domain = iommu-domains[i]; clear_bit(i, iommu-domain_ids); - domain_exit(domain); + if (--domain-iommu_count == 0) + domain_exit(domain); i = find_next_bit(iommu-domain_ids, cap_ndoms(iommu-cap), i+1); } @@ -990,6 +1012,7 @@ static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) iommu-domains[num] = domain; domain-flags = 0; + domain-iommu_count = 1; spin_unlock_irqrestore(iommu-lock, flags); return domain; @@ -1269,9 +1292,12 @@ domain_page_mapping(struct dmar_domain *domain, dma_addr_t iova, return 0; } -static void detach_domain_for_dev(struct dmar_domain *domain, u8 bus, u8 devfn) +static void iommu_detach_dev(u8 bus, u8 devfn) { - struct intel_iommu *iommu = domain_get_iommu(domain); + struct intel_iommu *iommu = device_find_matched_iommu(bus, devfn); + + if (!iommu) + return; clear_context_table(iommu, bus, devfn); iommu-flush.flush_context(iommu, 0, 0, 0, @@ -1295,7 +1321,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) info-dev-dev.archdata.iommu = NULL; spin_unlock_irqrestore(device_domain_lock, flags); - detach_domain_for_dev(info-domain, info-bus, info-devfn); + iommu_detach_dev(info-bus, info-devfn); free_devinfo_mem(info); spin_lock_irqsave(device_domain_lock, flags); @@ -2330,6 +2356,134 @@ int __init intel_iommu_init(void) return 0; } +/* Coherency capability may be different across iommus */ +static void domain_update_iommu_coherency(struct dmar_domain *domain) +{ + struct dmar_drhd_unit *drhd; + + domain-iommu_coherency = 1; + + for_each_drhd_unit(drhd) { + if (drhd-ignored) + continue; + if (test_bit(drhd-iommu-seq_id, domain-iommu_bmp)) { + if (!ecap_coherent(drhd-iommu-ecap)) { + domain-iommu_coherency = 0; + break; + } + } + } +} + +static int vm_domain_add_dev_info(struct dmar_domain *domain, + struct pci_dev *pdev) +{ + struct device_domain_info *info; + unsigned long flags; + + info = alloc_devinfo_mem(); + if (!info) + return -ENOMEM; + + info-bus = pdev-bus-number; + info-devfn = pdev-devfn; + info-dev = pdev; + info-domain = domain; + + spin_lock_irqsave(device_domain_lock, flags); + list_add(info-link, domain-devices); + list_add(info-global, device_domain_list); + pdev-dev.archdata.iommu = info; + spin_unlock_irqrestore(device_domain_lock, flags); + + return 0; +} + +static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, + struct pci_dev *pdev) +{ + struct device_domain_info *info; + struct intel_iommu *iommu; + unsigned long flags; + int found = 0; + + iommu = device_find_matched_iommu(pdev-bus-number, pdev-devfn); + + spin_lock_irqsave(device_domain_lock, flags); + while (!list_empty(domain-devices)) { + info = list_entry(domain-devices.next, + struct device_domain_info, link); + if (info-bus
[PATCH 07/13] add domain_flush_cache
For some common low level functions which will be also used by virtual machine usage, use domain_flush_cache instead of __iommu_flush_cache. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 40 1 files changed, 24 insertions(+), 16 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 429aff4..b00a8f2 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -200,6 +200,13 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) return NULL; } +static void domain_flush_cache(struct dmar_domain *domain, + void *addr, int size) +{ + if (!domain-iommu_coherency) + clflush_cache_range(addr, size); +} + static struct intel_iommu *device_find_matched_iommu(u8 bus, u8 devfn) { struct dmar_drhd_unit *drhd = NULL; @@ -316,7 +323,6 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) int level = agaw_to_level(domain-agaw); int offset; unsigned long flags; - struct intel_iommu *iommu = domain_get_iommu(domain); BUG_ON(!domain-pgd); @@ -340,8 +346,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) flags); return NULL; } - __iommu_flush_cache(iommu, tmp_page, - PAGE_SIZE); + domain_flush_cache(domain, tmp_page, PAGE_SIZE); dma_set_pte_addr(*pte, virt_to_phys(tmp_page)); /* * high level table always sets r/w, last level page @@ -349,7 +354,7 @@ static struct dma_pte * addr_to_dma_pte(struct dmar_domain *domain, u64 addr) */ dma_set_pte_readable(*pte); dma_set_pte_writable(*pte); - __iommu_flush_cache(iommu, pte, sizeof(*pte)); + domain_flush_cache(domain, pte, sizeof(*pte)); } parent = phys_to_virt(dma_pte_addr(*pte)); level--; @@ -386,14 +391,13 @@ static struct dma_pte *dma_addr_level_pte(struct dmar_domain *domain, u64 addr, static void dma_pte_clear_one(struct dmar_domain *domain, u64 addr) { struct dma_pte *pte = NULL; - struct intel_iommu *iommu = domain_get_iommu(domain); /* get last level pte */ pte = dma_addr_level_pte(domain, addr, 1); if (pte) { dma_clear_pte(*pte); - __iommu_flush_cache(iommu, pte, sizeof(*pte)); + domain_flush_cache(domain, pte, sizeof(*pte)); } } @@ -422,7 +426,6 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int addr_width = agaw_to_width(domain-agaw); struct dma_pte *pte; int total = agaw_to_level(domain-agaw); - struct intel_iommu *iommu = domain_get_iommu(domain); int level; u64 tmp; @@ -442,8 +445,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, free_pgtable_page( phys_to_virt(dma_pte_addr(*pte))); dma_clear_pte(*pte); - __iommu_flush_cache(iommu, - pte, sizeof(*pte)); + domain_flush_cache(domain, pte, sizeof(*pte)); } tmp += level_size(level); } @@ -1158,12 +1160,16 @@ static int domain_context_mapping_one(struct dmar_domain *domain, u8 bus, u8 devfn) { struct context_entry *context; - struct intel_iommu *iommu = domain_get_iommu(domain); + struct intel_iommu *iommu; unsigned long flags; pr_debug(Set context mapping for %02x:%02x.%d\n, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); BUG_ON(!domain-pgd); + + iommu = device_find_matched_iommu(bus, devfn); + if (!iommu) + return -ENODEV; context = device_to_context_entry(iommu, bus, devfn); if (!context) return -ENOMEM; @@ -1225,12 +1231,15 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) tmp-bus-number, tmp-devfn); } -static int domain_context_mapped(struct dmar_domain *domain, - struct pci_dev *pdev) +static int domain_context_mapped(struct pci_dev *pdev) { int ret; struct pci_dev *tmp, *parent; - struct intel_iommu *iommu = domain_get_iommu(domain); + struct intel_iommu *iommu; + + iommu = device_find_matched_iommu(pdev-bus-number, pdev-devfn); + if (!iommu) + return -ENODEV; ret = device_context_mapped(iommu,
[PATCH 08/13] allocation and free functions of virtual machine domain
Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 104 - 1 files changed, 103 insertions(+), 1 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b00a8f2..e96b3bc 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -947,6 +947,7 @@ static int iommu_init_domains(struct intel_iommu *iommu) static void domain_exit(struct dmar_domain *domain); +static void vm_domain_exit(struct dmar_domain *domain); void free_dmar_iommu(struct intel_iommu *iommu) { @@ -957,8 +958,13 @@ void free_dmar_iommu(struct intel_iommu *iommu) for (; i cap_ndoms(iommu-cap); ) { domain = iommu-domains[i]; clear_bit(i, iommu-domain_ids); - if (--domain-iommu_count == 0) + + if (domain-flags DOMAIN_FLAG_VIRTUAL_MACHINE) { + if (--domain-iommu_count == 0) + vm_domain_exit(domain); + } else domain_exit(domain); + i = find_next_bit(iommu-domain_ids, cap_ndoms(iommu-cap), i+1); } @@ -2492,6 +2498,102 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(device_domain_lock, flags); } +/* domain id for virtual machine, it won't be set in context */ +static unsigned long vm_domid; + +static struct dmar_domain *iommu_alloc_vm_domain(void) +{ + struct dmar_domain *domain; + + domain = alloc_domain_mem(); + if (!domain) + return NULL; + + domain-id = vm_domid++; + domain-iommu_count = 0; + domain-iommu_coherency = 0; + memset(domain-iommu_bmp, 0, sizeof(unsigned long)); + domain-flags = DOMAIN_FLAG_VIRTUAL_MACHINE; + + return domain; +} + +static int vm_domain_init(struct dmar_domain *domain, int guest_width) +{ + int adjust_width; + + init_iova_domain(domain-iovad, DMA_32BIT_PFN); + spin_lock_init(domain-mapping_lock); + + domain_reserve_special_ranges(domain); + + /* calculate AGAW */ + domain-gaw = guest_width; + adjust_width = guestwidth_to_adjustwidth(guest_width); + domain-agaw = width_to_agaw(adjust_width); + + INIT_LIST_HEAD(domain-devices); + + /* always allocate the top pgd */ + domain-pgd = (struct dma_pte *)alloc_pgtable_page(); + if (!domain-pgd) + return -ENOMEM; + domain_flush_cache(domain, domain-pgd, PAGE_SIZE); + return 0; +} + +static void iommu_free_vm_domain(struct dmar_domain *domain) +{ + unsigned long flags; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + unsigned long i; + unsigned long ndomains; + + for_each_drhd_unit(drhd) { + if (drhd-ignored) + continue; + iommu = drhd-iommu; + + ndomains = cap_ndoms(iommu-cap); + i = find_first_bit(iommu-domain_ids, ndomains); + for (; i ndomains; ) { + if (iommu-domains[i] == domain) { + spin_lock_irqsave(iommu-lock, flags); + clear_bit(i, iommu-domain_ids); + iommu-domains[i] = NULL; + spin_unlock_irqrestore(iommu-lock, flags); + break; + } + i = find_next_bit(iommu-domain_ids, ndomains, i+1); + } + } +} + +static void vm_domain_exit(struct dmar_domain *domain) +{ + u64 end; + + /* Domain 0 is reserved, so dont process it */ + if (!domain) + return; + + vm_domain_remove_all_dev_info(domain); + /* destroy iovas */ + put_iova_domain(domain-iovad); + end = DOMAIN_MAX_ADDR(domain-gaw); + end = end (~VTD_PAGE_MASK); + + /* clear ptes */ + dma_pte_clear_range(domain, 0, end); + + /* free page tables */ + dma_pte_free_pagetable(domain, 0, end); + + iommu_free_vm_domain(domain); + free_domain_mem(domain); +} + void intel_iommu_domain_exit(struct dmar_domain *domain) { u64 end; -- 1.5.1 0008-allocation-and-free-functions-of-virtual-machine-do.patch Description: 0008-allocation-and-free-functions-of-virtual-machine-do.patch
[PATCH 09/13] change domain_context_mapping_one for virtual machine domain
vm_domid won't be set in context, find available domain id for a device from its iommu. For a virtual machine domain, a default agaw will be set, and skip top levels of page tables for iommu which has less agaw than default. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 57 ++-- 1 files changed, 54 insertions(+), 3 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e96b3bc..3f987d7 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1168,6 +1168,11 @@ static int domain_context_mapping_one(struct dmar_domain *domain, struct context_entry *context; struct intel_iommu *iommu; unsigned long flags; + struct dma_pte *pgd; + unsigned long num; + unsigned long ndomains; + int id; + int agaw; pr_debug(Set context mapping for %02x:%02x.%d\n, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); @@ -1185,9 +1190,55 @@ static int domain_context_mapping_one(struct dmar_domain *domain, return 0; } - context_set_domain_id(*context, domain-id); - context_set_address_width(*context, domain-agaw); - context_set_address_root(*context, virt_to_phys(domain-pgd)); + id = domain-id; + pgd = domain-pgd; + + if (domain-flags DOMAIN_FLAG_VIRTUAL_MACHINE) { + int found = 0; + + /* find an available domain id for this device in iommu */ + ndomains = cap_ndoms(iommu-cap); + num = find_first_bit(iommu-domain_ids, ndomains); + for (; num ndomains; ) { + if (iommu-domains[num] == domain) { + id = num; + found = 1; + break; + } + num = find_next_bit(iommu-domain_ids, + cap_ndoms(iommu-cap), num+1); + } + + if (found == 0) { + num = find_first_zero_bit(iommu-domain_ids, ndomains); + if (num = ndomains) { + spin_unlock_irqrestore(iommu-lock, flags); + printk(KERN_ERR IOMMU: no free domain ids\n); + return -EFAULT; + } + + set_bit(num, iommu-domain_ids); + iommu-domains[num] = domain; + id = num; + } + + /* Skip top levels of page tables for +* iommu which has less agaw than default. +*/ + for (agaw = domain-agaw; agaw != iommu-agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(*pgd)); + if (!dma_pte_present(*pgd)) { + spin_unlock_irqrestore(iommu-lock, flags); + return -ENOMEM; + } + } + + set_bit(iommu-seq_id, domain-iommu_bmp); + } + + context_set_domain_id(*context, id); + context_set_address_width(*context, iommu-agaw); + context_set_address_root(*context, virt_to_phys(pgd)); context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); context_set_fault_enable(*context); context_set_present(*context); -- 1.5.1 0009-change-domain_context_mapping_one-for-virtual-machin.patch Description: 0009-change-domain_context_mapping_one-for-virtual-machin.patch
[PATCH 10/13] change intel iommu APIs
These APIs will be used by kvm VT-d. The domain used by these APIs is virtual machine domain (domain flag is DOMAIN_FLAG_VIRTUAL_MACHINE). Signed-off-by: Weidong Han [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 128 --- include/linux/intel-iommu.h | 20 --- 2 files changed, 71 insertions(+), 77 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 3f987d7..0db77e2 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2645,93 +2645,87 @@ static void vm_domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } -void intel_iommu_domain_exit(struct dmar_domain *domain) +struct dmar_domain *intel_iommu_alloc_domain(void) { - u64 end; - - /* Domain 0 is reserved, so dont process it */ - if (!domain) - return; - - end = DOMAIN_MAX_ADDR(domain-gaw); - end = end (~VTD_PAGE_MASK); - - /* clear ptes */ - dma_pte_clear_range(domain, 0, end); - - /* free page tables */ - dma_pte_free_pagetable(domain, 0, end); - - iommu_free_domain(domain); - free_domain_mem(domain); -} -EXPORT_SYMBOL_GPL(intel_iommu_domain_exit); - -struct dmar_domain *intel_iommu_domain_alloc(struct pci_dev *pdev) -{ - struct dmar_drhd_unit *drhd; struct dmar_domain *domain; - struct intel_iommu *iommu; - drhd = dmar_find_matched_drhd_unit(pdev); - if (!drhd) { - printk(KERN_ERR intel_iommu_domain_alloc: drhd == NULL\n); - return NULL; - } - - iommu = drhd-iommu; - if (!iommu) { - printk(KERN_ERR - intel_iommu_domain_alloc: iommu == NULL\n); - return NULL; - } - domain = iommu_alloc_domain(iommu); + domain = iommu_alloc_vm_domain(); if (!domain) { printk(KERN_ERR intel_iommu_domain_alloc: domain == NULL\n); return NULL; } - if (domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { printk(KERN_ERR intel_iommu_domain_alloc: domain_init() failed\n); - intel_iommu_domain_exit(domain); + vm_domain_exit(domain); return NULL; } + return domain; } -EXPORT_SYMBOL_GPL(intel_iommu_domain_alloc); +EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain); -int intel_iommu_context_mapping( - struct dmar_domain *domain, struct pci_dev *pdev) +void intel_iommu_free_domain(struct dmar_domain *domain) { - int rc; - rc = domain_context_mapping(domain, pdev); - return rc; + vm_domain_exit(domain); } -EXPORT_SYMBOL_GPL(intel_iommu_context_mapping); +EXPORT_SYMBOL_GPL(intel_iommu_free_domain); -int intel_iommu_page_mapping( - struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot) +int intel_iommu_attach_device(struct dmar_domain *domain, + struct pci_dev *pdev) { - int rc; - rc = domain_page_mapping(domain, iova, hpa, size, prot); - return rc; + int ret; + + /* normally pdev is not mapped */ + if (unlikely(domain_context_mapped(pdev))) { + struct dmar_domain *old_domain; + + old_domain = find_domain(pdev); + if (old_domain) { + if (domain-flags DOMAIN_FLAG_VIRTUAL_MACHINE) + vm_domain_remove_one_dev_info(old_domain, pdev); + else + domain_remove_dev_info(old_domain); + } + } + + ret = domain_context_mapping(domain, pdev); + if (ret) + return ret; + + ret = vm_domain_add_dev_info(domain, pdev); + return ret; } -EXPORT_SYMBOL_GPL(intel_iommu_page_mapping); +EXPORT_SYMBOL_GPL(intel_iommu_attach_device); -void intel_iommu_detach_dev(struct dmar_domain *domain, u8 bus, u8 devfn) +void intel_iommu_detach_device(struct dmar_domain *domain, + struct pci_dev *pdev) { - iommu_detach_dev(bus, devfn); + vm_domain_remove_one_dev_info(domain, pdev); } -EXPORT_SYMBOL_GPL(intel_iommu_detach_dev); +EXPORT_SYMBOL_GPL(intel_iommu_detach_device); -struct dmar_domain * -intel_iommu_find_domain(struct pci_dev *pdev) +int intel_iommu_map_address(struct dmar_domain *domain, dma_addr_t iova, + u64 hpa, size_t size, int prot) { - return find_domain(pdev); + int ret; + ret = domain_page_mapping(domain, iova, hpa, size, prot); + return ret; } -EXPORT_SYMBOL_GPL(intel_iommu_find_domain); +EXPORT_SYMBOL_GPL(intel_iommu_map_address); + +void intel_iommu_unmap_address(struct dmar_domain *domain, + dma_addr_t iova, size_t size) +{ + dma_addr_t base; + +
[PATCH 13/13] KVM: support device assignment
Support device assignment, it can be used in device hotplug. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- include/linux/kvm.h |5 + include/linux/kvm_host.h |8 virt/kvm/kvm_main.c | 42 ++ virt/kvm/vtd.c | 24 4 files changed, 79 insertions(+), 0 deletions(-) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0997e6f..2904276 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -395,6 +395,9 @@ struct kvm_trace_rec { #if defined(CONFIG_X86) #define KVM_CAP_DEVICE_MSI 20 #endif +#if defined(CONFIG_X86)||defined(CONFIG_IA64) +#define KVM_CAP_DEVICE_DEASSIGNMENT 21 +#endif /* * ioctls for VM fds @@ -428,6 +431,8 @@ struct kvm_trace_rec { struct kvm_assigned_pci_dev) #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) +#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x71, \ +struct kvm_assigned_pci_dev) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index aeabd32..cb1d404 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -333,6 +333,8 @@ int kvm_iommu_map_guest(struct kvm *kvm); int kvm_iommu_unmap_guest(struct kvm *kvm); int kvm_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_deassign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); #else /* CONFIG_DMAR */ static inline int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, @@ -356,6 +358,12 @@ static inline int kvm_assign_device(struct kvm *kvm, { return 0; } + +static inline int kvm_deassign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + return 0; +} #endif /* CONFIG_DMAR */ static inline void kvm_guest_enter(void) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 43a5236..fe6aba0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -499,6 +499,35 @@ out_free: } #endif +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT +static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(kvm-lock); + + match = kvm_find_assigned_dev(kvm-arch.assigned_dev_head, + assigned_dev-assigned_dev_id); + if (!match) { + printk(KERN_INFO %s: device hasn't been assigned before, + so cannot be deassigned\n, __func__); + r = -EINVAL; + goto out; + } + + if (assigned_dev-flags KVM_DEV_ASSIGN_ENABLE_IOMMU) + kvm_deassign_device(kvm, match); + + kvm_free_assigned_device(kvm, match); + +out: + mutex_unlock(kvm-lock); + return r; +} +#endif + static inline int valid_vcpu(int n) { return likely(n = 0 n KVM_MAX_VCPUS); @@ -1838,6 +1867,19 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT + case KVM_DEASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if (copy_from_user(assigned_dev, argp, sizeof assigned_dev)) + goto out; + r = kvm_vm_ioctl_deassign_device(kvm, assigned_dev); + if (r) + goto out; + break; + } +#endif default: r = kvm_arch_vm_ioctl(filp, ioctl, arg); } diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c index 44bb58a..174ea1f 100644 --- a/virt/kvm/vtd.c +++ b/virt/kvm/vtd.c @@ -116,6 +116,30 @@ int kvm_assign_device(struct kvm *kvm, return 0; } +int kvm_deassign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + struct dmar_domain *domain = kvm-arch.intel_iommu_domain; + struct pci_dev *pdev = NULL; + + /* check if iommu exists and in use */ + if (!domain) + return 0; + + pdev = assigned_dev-dev; + if (pdev == NULL) + return -ENODEV; + + intel_iommu_detach_device(domain, pdev); + + printk(KERN_DEBUG deassign device: host bdf = %x:%x:%x\n, + assigned_dev-host_busnr, + PCI_SLOT(assigned_dev-host_devfn), + PCI_FUNC(assigned_dev-host_devfn)); + + return 0; +} + int kvm_iommu_map_guest(struct kvm *kvm) { int r; -- 1.5.1 0013-KVM-support-device-assignment.patch Description: 0013-KVM-support-device-assignment.patch
[PATCH 12/13] KVM: use the new intel iommu APIs
intel iommu APIs are updated, use the new APIs. In addition, change kvm_iommu_map_guest() to just create the domain, let kvm_iommu_assign_device() assign device. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- include/linux/kvm_host.h | 15 +-- virt/kvm/kvm_main.c |7 +++- virt/kvm/vtd.c | 98 ++ 3 files changed, 71 insertions(+), 49 deletions(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8091a4d..aeabd32 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -329,9 +329,10 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); #ifdef CONFIG_DMAR int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); -int kvm_iommu_map_guest(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_iommu_map_guest(struct kvm *kvm); int kvm_iommu_unmap_guest(struct kvm *kvm); +int kvm_assign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); #else /* CONFIG_DMAR */ static inline int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, @@ -340,9 +341,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm, return 0; } -static inline int kvm_iommu_map_guest(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) +static inline int kvm_iommu_map_guest(struct kvm *kvm) { return -ENODEV; } @@ -351,6 +350,12 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm) { return 0; } + +static inline int kvm_assign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + return 0; +} #endif /* CONFIG_DMAR */ static inline void kvm_guest_enter(void) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8dab7ce..43a5236 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -472,7 +472,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(match-list, kvm-arch.assigned_dev_head); if (assigned_dev-flags KVM_DEV_ASSIGN_ENABLE_IOMMU) { - r = kvm_iommu_map_guest(kvm, match); + if (!kvm-arch.intel_iommu_domain) { + r = kvm_iommu_map_guest(kvm); + if (r) + goto out_list_del; + } + r = kvm_assign_device(kvm, match); if (r) goto out_list_del; } diff --git a/virt/kvm/vtd.c b/virt/kvm/vtd.c index a770874..44bb58a 100644 --- a/virt/kvm/vtd.c +++ b/virt/kvm/vtd.c @@ -45,20 +45,18 @@ int kvm_iommu_map_pages(struct kvm *kvm, for (i = 0; i npages; i++) { /* check if already mapped */ - pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, -gfn_to_gpa(gfn)); - if (pfn) + if (intel_iommu_iova_to_phys(domain, +gfn_to_gpa(gfn))) continue; pfn = gfn_to_pfn(kvm, gfn); - r = intel_iommu_page_mapping(domain, -gfn_to_gpa(gfn), -pfn_to_hpa(pfn), -PAGE_SIZE, -DMA_PTE_READ | -DMA_PTE_WRITE); + r = intel_iommu_map_address(domain, + gfn_to_gpa(gfn), + pfn_to_hpa(pfn), + PAGE_SIZE, + DMA_PTE_READ | DMA_PTE_WRITE); if (r) { - printk(KERN_ERR kvm_iommu_map_pages: + printk(KERN_ERR kvm_iommu_map_address: iommu failed to map pfn=%lx\n, pfn); goto unmap_pages; } @@ -86,50 +84,55 @@ static int kvm_iommu_map_memslots(struct kvm *kvm) return r; } -int kvm_iommu_map_guest(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev) +int kvm_assign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) { struct pci_dev *pdev = NULL; + struct dmar_domain *domain = kvm-arch.intel_iommu_domain; int r; - if (!intel_iommu_found()) { - printk(KERN_ERR %s: intel iommu not found\n, __func__); + /* check if iommu exists and in use */ + if (!domain) + return 0; + + pdev = assigned_dev-dev; + if (pdev == NULL) return -ENODEV; + + r = intel_iommu_attach_device(domain, pdev); + if (r) { +
[PATCH 2/2] kvm: set owner of cpu and vm file operations
There is a race between a close of the file descriptors and module unload in the kvm module. You can easily trigger this problem by applying this debug patch: --- kvm.orig/virt/kvm/kvm_main.c +++ kvm/virt/kvm/kvm_main.c @@ -648,10 +648,14 @@ void kvm_free_physmem(struct kvm *kvm) kvm_free_physmem_slot(kvm-memslots[i], NULL); } +#include linux/delay.h static void kvm_destroy_vm(struct kvm *kvm) { struct mm_struct *mm = kvm-mm; + printk(off1\n); + msleep(5000); + printk(off2\n); spin_lock(kvm_lock); list_del(kvm-vm_list); spin_unlock(kvm_lock); and killing the userspace, followed by an rmmod. The problem is that kvm_destroy_vm can run while the module count is 0. That means, you can remove the module while kvm_destroy_vm is running. But kvm_destroy_vm is part of the module text. This causes a kerneloops. The race exists without the msleep but is much harder to trigger. This patch requires the fix for anon_inodes (anon_inodes: use fops-owner for module refcount). With this patch, we can set the owner of all anonymous KVM inodes file operations. The VFS will then control the KVM module refcount as long as there is an open file. kvm_destroy_vm will be called by the release function of the last closed file - before the VFS drops the module refcount. Signed-off-by: Christian Borntraeger [EMAIL PROTECTED] --- virt/kvm/kvm_main.c |6 -- 1 file changed, 4 insertions(+), 2 deletions(-) Index: kvm/virt/kvm/kvm_main.c === --- kvm.orig/virt/kvm/kvm_main.c +++ kvm/virt/kvm/kvm_main.c @@ -1303,7 +1303,7 @@ static int kvm_vcpu_release(struct inode return 0; } -static const struct file_operations kvm_vcpu_fops = { +static struct file_operations kvm_vcpu_fops = { .release= kvm_vcpu_release, .unlocked_ioctl = kvm_vcpu_ioctl, .compat_ioctl = kvm_vcpu_ioctl, @@ -1697,7 +1697,7 @@ static int kvm_vm_mmap(struct file *file return 0; } -static const struct file_operations kvm_vm_fops = { +static struct file_operations kvm_vm_fops = { .release= kvm_vm_release, .unlocked_ioctl = kvm_vm_ioctl, .compat_ioctl = kvm_vm_ioctl, @@ -2061,6 +2061,8 @@ int kvm_init(void *opaque, unsigned int } kvm_chardev_ops.owner = module; + kvm_vm_fops.owner = module; + kvm_vcpu_fops.owner = module; r = misc_register(kvm_dev); if (r) { -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Hangs
The same guest did it again. # uname -a Linux spaceball 2.6.27.6 #1 SMP Fri Nov 14 11:51:10 CET 2008 i686 QEMU Virtual CPU version 0.9.1 AuthenticAMD GNU/Linux # date Thu Dec 19 01:54:27 WET 1912 # uptime 01:54:29 up 14666 days, 21:17, 12 users, load average: 3.99, 3.97, 3.91 What can I do to provide more info? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 12/12] VT-d: register functions for the IOMMU API
Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 13 + 1 files changed, 13 insertions(+), 0 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 2e8b102..bb6f771 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -55,6 +55,7 @@ static void flush_unmaps_timeout(unsigned long data); +static struct iommu_ops intel_iommu_ops; DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); @@ -2429,6 +2430,9 @@ int __init intel_iommu_init(void) init_timer(unmap_timer); force_iommu = 1; dma_ops = intel_dma_ops; + + register_iommu(intel_iommu_ops); + return 0; } @@ -2929,3 +2933,12 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return paddr; } +static struct iommu_ops intel_iommu_ops = { + .domain_init= intel_iommu_domain_init, + .domain_destroy = intel_iommu_domain_destroy, + .attach_dev = intel_iommu_attach_device, + .detach_dev = intel_iommu_detach_device, + .map= intel_iommu_map, + .unmap = intel_iommu_unmap, + .iova_to_phys = intel_iommu_iova_to_phys, +}; -- 1.5.6.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/12] VT-d: adapt domain iova_to_phys function for IOMMU API
Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 15 --- include/linux/intel-iommu.h |2 -- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index ac22973..2e8b102 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2913,18 +2913,19 @@ int intel_iommu_found(void) } EXPORT_SYMBOL_GPL(intel_iommu_found); -u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) +static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, + unsigned long iova) { + struct dmar_domain *dmar_domain = domain-priv; struct dma_pte *pte; - u64 pfn; + phys_addr_t paddr; - pfn = 0; - pte = addr_to_dma_pte(domain, iova); + paddr = 0; + pte = addr_to_dma_pte(dmar_domain, iova); if (pte) - pfn = dma_pte_addr(*pte); + paddr = dma_pte_addr(*pte); - return pfn VTD_PAGE_SHIFT; + return paddr; } -EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index ac79a1c..469508f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -337,8 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); - #ifdef CONFIG_DMAR int intel_iommu_found(void); #else /* CONFIG_DMAR */ -- 1.5.6.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2318236 ] SCSI debug
Bugs item #2318236, was opened at 2008-11-20 13:41 Message generated for change (Comment added) made by ryandbair You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2318236group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Closed Resolution: Duplicate Priority: 5 Private: No Submitted By: Ryan Bair (ryandbair) Assigned to: Nobody/Anonymous (nobody) Summary: SCSI debug Initial Comment: Here is the stdout with SCSI_DEBUG enabled. The guest is Windows Server 2003 R2 x64 with an emulated scsi device being served from a 36GB raw file on a Debian Lenny host with KVM-79. I get the mentioned error on both quick and full format. Let me know if there is anything else I can provide that would be of assistance. -- Comment By: Ryan Bair (ryandbair) Date: 2008-12-02 09:52 Message: Sorry, I meant to reply to bug 2171940 but got a bit confused while attempting to attach a file. I'm seeing the same issue as in that bug. Setup is extremely unstable with SCSI drives and formatting the drive always results in an error saying that the drive could not be formatted and it may be faulty. I've also tried preformatting the drive, in that case it eventually gives a BSOD while copying files with ntfs.sys being the faulting module. I've retried a the setup about 6 times and the results are consistent. -- Comment By: Avi Kivity (avik) Date: 2008-11-23 13:46 Message: What exactly is the problem you're seeing? -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2318236group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/5] KVM: remove the IRQ ACK notifier assertions
Mark McLoughlin wrote: We will obviously never pass a NULL struct kvm_irq_ack_notifier* to this functions. They are always embedded in the assigned device structure, so the assertion add nothing. The irqchip_in_kernel() assertion is very out of place - clearly this little abstraction needs to know nothing about the upper layer details. Applied all, thanks. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] anon_inodes: use fops-owner for module refcount
There is an imbalance for anonymous inodes. If the fops-owner field is set, the module reference count of owner is decreases on release. (filp_close -- __fput --- fops_put) On the other hand, anon_inode_getfd does not increase the module reference count of owner. This causes two problems: - if owner is set, the module refcount goes negative - if owner is not set, the module can be unloaded while code is running This patch changes anon_inode_getfd to be symmetric regarding fops-owner handling. I have checked all existing users of anon_inode_getfd. Noone sets fops-owner, thats why nobody has seen the module refcount negative. The refcounting was tested with a patched and unpatched KVM module.(see patch 2/2) I also did an epoll_open/close test. Signed-off-by: Christian Borntraeger [EMAIL PROTECTED] Reviewed-by: Davide Libenzi [EMAIL PROTECTED] --- fs/anon_inodes.c |7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) Index: kvm/fs/anon_inodes.c === --- kvm.orig/fs/anon_inodes.c +++ kvm/fs/anon_inodes.c @@ -79,9 +79,12 @@ int anon_inode_getfd(const char *name, c if (IS_ERR(anon_inode_inode)) return -ENODEV; + if (fops-owner !try_module_get(fops-owner)) + return -ENOENT; + error = get_unused_fd_flags(flags); if (error 0) - return error; + goto err_module; fd = error; /* @@ -128,6 +131,8 @@ err_dput: dput(dentry); err_put_unused_fd: put_unused_fd(fd); +err_module: + module_put(fops-owner); return error; } EXPORT_SYMBOL_GPL(anon_inode_getfd); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/2] module_refcounting and anonymous inodes
Hello Avi, here is the latest respin of my fixes for the kvm module unload problem: [PATCH 1/2] anon_inodes: use fops-owner for module refcount [PATCH 2/2] kvm: set owner of cpu and vm file operations Both patches fix module reference counting problems and only matter for module unload - nothing critical. Tested on s390 and x86_32. Christian -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 10/12] VT-d: adapt domain map and unmap functions for IOMMU API
Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 22 +++--- include/linux/intel-iommu.h |4 2 files changed, 11 insertions(+), 15 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 62ae6b1..ac22973 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2864,20 +2864,21 @@ static void intel_iommu_detach_device(struct iommu_domain *domain, vm_domain_remove_one_dev_info(dmar_domain, pdev); } -int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot) +static int intel_iommu_map(struct iommu_domain *domain, unsigned long iova, + phys_addr_t hpa, size_t size, int prot) { + struct dmar_domain *dmar_domain = domain-priv; u64 max_addr; int addr_width; int ret; max_addr = (iova VTD_PAGE_MASK) + VTD_PAGE_ALIGN(size); - if (domain-max_addr max_addr) { + if (dmar_domain-max_addr max_addr) { int min_agaw; u64 end; /* check if minimum agaw is sufficient for mapped address */ - min_agaw = vm_domain_min_agaw(domain); + min_agaw = vm_domain_min_agaw(dmar_domain); addr_width = agaw_to_width(min_agaw); end = DOMAIN_MAX_ADDR(addr_width); end = end VTD_PAGE_MASK; @@ -2887,25 +2888,24 @@ int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova, __func__, min_agaw, max_addr); return -EFAULT; } - domain-max_addr = max_addr; + dmar_domain-max_addr = max_addr; } - ret = domain_page_mapping(domain, iova, hpa, size, prot); + ret = domain_page_mapping(dmar_domain, iova, hpa, size, prot); return ret; } -EXPORT_SYMBOL_GPL(intel_iommu_map_pages); -void intel_iommu_unmap_pages(struct dmar_domain *domain, -dma_addr_t iova, size_t size) +static void intel_iommu_unmap(struct iommu_domain *domain, + unsigned long iova, size_t size) { + struct dmar_domain *dmar_domain = domain-priv; dma_addr_t base; /* The address might not be aligned */ base = iova PAGE_MASK; size = PAGE_ALIGN(size); - dma_pte_clear_range(domain, base, base + size); + dma_pte_clear_range(dmar_domain, base, base + size); } -EXPORT_SYMBOL_GPL(intel_iommu_unmap_pages); int intel_iommu_found(void) { diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 41d2a3b..ac79a1c 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -337,10 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -int intel_iommu_map_pages(struct dmar_domain *domain, dma_addr_t iova, - u64 hpa, size_t size, int prot); -void intel_iommu_unmap_pages(struct dmar_domain *domain, -dma_addr_t iova, size_t size); u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova); #ifdef CONFIG_DMAR -- 1.5.6.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[ kvm-Bugs-2353510 ] Fedora 10 failures
Bugs item #2353510, was opened at 2008-11-27 14:46 Message generated for change (Comment added) made by technologov You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2353510group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: Technologov (technologov) Assigned to: Nobody/Anonymous (nobody) Summary: Fedora 10 failures Initial Comment: Description: Fedora 10 fails to install on KVM. (KVM-79) The DVD version stucks at the near end setup stage, when trying to install GRUB bootloader into HDD. It didn't proceed within one hour, which indicates stucked VM. Sometimes it may stuck earlier - during init or during early setup. Live CD (32-bit) started fine on both Intel and AMD. (except top menu minor rendering bug) Guest(s): Fedora 10 64-bit Guest(s): Fedora 10 32-bit Host(s): Fedora 7 64-bit, Intel, KVM-79 Host(s): Fedora 7 64-bit, AMD, KVM-79 Command: (for DVD) qemu-kvm -cdrom /isos/linux/Fedora-10-x86_64-DVD.iso -m 512 -hda /vm/f10-64.qcow2 -boot d *and* (for LiveCD) qemu-kvm -cdrom /isos/linux/F10-i686-Live.iso -m 512 -Alexey, 27.11.2008. -- Comment By: Technologov (technologov) Date: 2008-12-02 12:39 Message: I have opened similar bug against Fedora 10 bugzilla: https://bugzilla.redhat.com/show_bug.cgi?id=474116 -Alexey -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2353510group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: STOP error with virtio on KVM-79/2.6.18/Win2k3 x64 guest
-Original Message- From: Dor Laor [mailto:[EMAIL PROTECTED] Sent: Monday, December 01, 2008 5:27 PM To: Adrian Schmitz Cc: kvm@vger.kernel.org; Avi Kivity Subject: Re: STOP error with virtio on KVM-79/2.6.18/Win2k3 x64 guest What driver version are you using? Version 2 is obsolete. I posted ver 3 few months ago, Avi can you please upload it to sourceforge. My old public space was blocked so I'll send you a private attachment to test. Dor. Okay, I received the version 3 drivers and installed them on my guest. This fixed the STOP errors. I can now run the same iperf tests without any crashes. The only problem I have now is that the virtio seems to be slower for me than e1000. Running iperf between the guest machine and the bridge interface on the host using the e1000 driver consistently gave measurements of around 320 Mb/s. With the new virtio drivers, I'm getting roughly 120 Mb/s. I'm not sure if I'm missing something simple. I tried changing the connection rate setting in the guest from the default 100M to 1G, but that didn't seem to help. I also tried using a tcp window size of 16k instead of the guest os default 8k, but that didn't make much of a difference, either. Below is some information about my setup. Please let me know if there's any other info I can provide, and thanks again for your help. Host Dist: CentOS 5.2 Host Kernel:2.6.18 Host Hardware: Dual AMD Quad-Core, 8G memory KVM Version:79 (modules and tools built and installed from source) Guest OS: Windows 2003 Server x64 SP2 (with all critical updates) Guest VCPUs:2 Guest memory: 4G KVM command used: /usr/bin/kvm -S -M pc -m 4096 -smp 2 -name bth-host00 -uuid 84d941a5-126e-d001-6029-d7d434a7dad6 -monitor pty -localtime -boot c -drive file=/dev/mapper/sys_bth-host00,if=ide,index=0,boot=on -drive file=/isoimages/netkvm.iso,if=ide,media=cdrom,index=2 -net nic,macaddr=00:16:3e:5d:a7:46,vlan=0,model=virtio -net tap,fd=19,script=,vlan=0,ifname=vnet1 -serial none -parallel none -usb -usbdevice tablet -vnc 127.0.0.1:1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/12] KVM: change KVM iommu.c to use IOMMU API
Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- arch/ia64/kvm/Makefile |2 +- arch/x86/include/asm/kvm_host.h |3 +- arch/x86/kvm/Makefile |2 +- virt/kvm/iommu.c| 68 --- virt/kvm/kvm_main.c |2 +- 5 files changed, 40 insertions(+), 37 deletions(-) diff --git a/arch/ia64/kvm/Makefile b/arch/ia64/kvm/Makefile index cb69dfc..0bb99b7 100644 --- a/arch/ia64/kvm/Makefile +++ b/arch/ia64/kvm/Makefile @@ -51,7 +51,7 @@ EXTRA_AFLAGS += -Ivirt/kvm -Iarch/ia64/kvm/ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ coalesced_mmio.o irq_comm.o) -ifeq ($(CONFIG_DMAR),y) +ifeq ($(CONFIG_IOMMU_API),y) common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index f58f7eb..77f4afa 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -14,6 +14,7 @@ #include linux/types.h #include linux/mm.h #include linux/mmu_notifier.h +#include linux/iommu.h #include linux/kvm.h #include linux/kvm_para.h @@ -356,7 +357,7 @@ struct kvm_arch{ */ struct list_head active_mmu_pages; struct list_head assigned_dev_head; - struct dmar_domain *intel_iommu_domain; + struct iommu_domain *iommu_domain; struct kvm_pic *vpic; struct kvm_ioapic *vioapic; struct kvm_pit *vpit; diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile index 00f46c2..d3ec292 100644 --- a/arch/x86/kvm/Makefile +++ b/arch/x86/kvm/Makefile @@ -7,7 +7,7 @@ common-objs = $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \ ifeq ($(CONFIG_KVM_TRACE),y) common-objs += $(addprefix ../../../virt/kvm/, kvm_trace.o) endif -ifeq ($(CONFIG_DMAR),y) +ifeq ($(CONFIG_IOMMU_API),y) common-objs += $(addprefix ../../../virt/kvm/, iommu.o) endif diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 832ee04..110c455 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -16,15 +16,18 @@ * * Copyright (C) 2006-2008 Intel Corporation * Copyright IBM Corporation, 2008 + * Copyright (C) 2008 Advanced Micro Devices, Inc. * Author: Allen M. Kay [EMAIL PROTECTED] * Author: Weidong Han [EMAIL PROTECTED] * Author: Ben-Ami Yassour [EMAIL PROTECTED] + * Author: Joerg Roedel [EMAIL PROTECTED] */ #include linux/list.h #include linux/kvm_host.h #include linux/pci.h #include linux/dmar.h +#include linux/iommu.h #include linux/intel-iommu.h static int kvm_iommu_unmap_memslots(struct kvm *kvm); @@ -36,9 +39,9 @@ int kvm_iommu_map_pages(struct kvm *kvm, { gfn_t gfn = base_gfn; pfn_t pfn; - int r = 0; - unsigned long i; - struct dmar_domain *domain = kvm-arch.intel_iommu_domain; + phys_addr_t paddr; + int i, r = 0; + struct iommu_domain *domain = kvm-arch.iommu_domain; /* check if iommu exists and in use */ if (!domain) @@ -46,18 +49,13 @@ int kvm_iommu_map_pages(struct kvm *kvm, for (i = 0; i npages; i++) { /* check if already mapped */ - pfn = (pfn_t)intel_iommu_iova_to_pfn(domain, -gfn_to_gpa(gfn)); - if (pfn) + paddr = (pfn_t)iommu_iova_to_phys(domain, gfn_to_gpa(gfn)); + if (paddr) kvm_iommu_put_pages(kvm, gfn, 1); pfn = gfn_to_pfn(kvm, gfn); - r = intel_iommu_map_pages(domain, - gfn_to_gpa(gfn), - pfn_to_hpa(pfn), - PAGE_SIZE, - DMA_PTE_READ | - DMA_PTE_WRITE); + r = iommu_map_range(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn), + PAGE_SIZE, DMA_PTE_READ | DMA_PTE_WRITE); if (r) { printk(KERN_ERR kvm_iommu_map_pages: iommu failed to map pfn=%lx\n, pfn); @@ -91,7 +89,7 @@ int kvm_assign_device(struct kvm *kvm, struct kvm_assigned_dev_kernel *assigned_dev) { struct pci_dev *pdev = NULL; - struct dmar_domain *domain = kvm-arch.intel_iommu_domain; + struct iommu_domain *domain = kvm-arch.iommu_domain; int r; /* check if iommu exists and in use */ @@ -102,7 +100,12 @@ int kvm_assign_device(struct kvm *kvm, if (pdev == NULL) return -ENODEV; - r = intel_iommu_assign_device(domain, pdev); + if (!iommu_found()) { + printk(KERN_ERR %s: No IOMMU found\n, __func__); + return -ENODEV; + } + + r = iommu_attach_device(domain, pdev-dev); if (r) { printk(KERN_ERR assign device %x:%x.%x failed, pdev-bus-number, @@
[ kvm-Bugs-2088475 ] OpenSuse10.2 can not be installed
Bugs item #2088475, was opened at 2008-09-02 11:37 Message generated for change (Comment added) made by technologov You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2088475group_id=180599 Please note that this message will contain a full copy of the comment thread, including the initial issue submission, for this request, not just the latest update. Category: None Group: None Status: Open Resolution: None Priority: 5 Private: No Submitted By: Jiajun Xu (jiajun) Assigned to: Nobody/Anonymous (nobody) Summary: OpenSuse10.2 can not be installed Initial Comment: OpenSuse10.2 can not be installed on KVM. Installer will stop after loading ISOLinux. It is against latest kvm comit, kvm.git :5b9207ec01681337786c7898ffc0165ec4e7c2e4 userspace.git :5f2a9719f105e29fbde4529cf919a5351b05da9a. -- Comment By: Technologov (technologov) Date: 2008-12-02 16:06 Message: BTW: If you absolutely _must_ have openSUSE 10.2 there are workarounds that allow you to install it anyway. 1. Install 10.2 using Qemu, then disable bootloader -or- 2. Start VM, and press-n-hold shift during KVM's BIOS load. -- Comment By: Technologov (technologov) Date: 2008-12-02 15:58 Message: It crashed with old KVMs, but with newer it just stucks. Doesn't matters. And yes, openSUSE 11.0 tested to work. -- Comment By: Jiajun Xu (jiajun) Date: 2008-10-16 17:23 Message: From the bug description, opensuse11.0 should work? And we did not meet guest crash when installation, guest hangs when loading grub and no any error messages printed. -- Comment By: Technologov (technologov) Date: 2008-10-16 17:04 Message: Known issue: https://sourceforge.net/tracker/index.php?func=detailaid=1760424group_id=180599atid=893831 This bug is duplicate. -- You can respond by visiting: https://sourceforge.net/tracker/?func=detailatid=893831aid=2088475group_id=180599 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 06/12] select IOMMU_API when DMAR and/or AMD_IOMMU is selected
These two IOMMUs can implement the current version of this API. So select the API if one or both of these IOMMU drivers is selected. Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- arch/ia64/Kconfig |3 +++ arch/x86/Kconfig |3 +++ drivers/base/Makefile |1 + 3 files changed, 7 insertions(+), 0 deletions(-) diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 6bd91ed..6a7b0c9 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -687,3 +687,6 @@ config IRQ_PER_CPU config IOMMU_HELPER def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) + +config IOMMU_API + def_bool (DMAR) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index ac22bb7..b9f7187 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -580,6 +580,9 @@ config SWIOTLB config IOMMU_HELPER def_bool (CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU) +config IOMMU_API + def_bool (AMD_IOMMU || DMAR) + config MAXSMP bool Configure Maximum number of SMP Processors and NUMA Nodes depends on X86_64 SMP BROKEN diff --git a/drivers/base/Makefile b/drivers/base/Makefile index c666373..b5b8ba5 100644 --- a/drivers/base/Makefile +++ b/drivers/base/Makefile @@ -11,6 +11,7 @@ obj-$(CONFIG_FW_LOADER) += firmware_class.o obj-$(CONFIG_NUMA) += node.o obj-$(CONFIG_MEMORY_HOTPLUG_SPARSE) += memory.o obj-$(CONFIG_SMP) += topology.o +obj-$(CONFIG_IOMMU_API) += iommu.o ifeq ($(CONFIG_SYSFS),y) obj-$(CONFIG_MODULES) += module.o endif -- 1.5.6.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] kvm-userspace: Add missing KVM string in the signature of CPUID
It adds a missing KVM string in the signature of the CPUID. Without it signature[2] is not well defined. Signed-off-by: Guillaume Thouvenin [EMAIL PROTECTED] --- qemu/qemu-kvm-x86.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/qemu/qemu-kvm-x86.c b/qemu/qemu-kvm-x86.c index 671b5b3..e9b200a 100644 --- a/qemu/qemu-kvm-x86.c +++ b/qemu/qemu-kvm-x86.c @@ -573,7 +573,7 @@ int kvm_arch_qemu_init_env(CPUState *cenv) #ifdef KVM_CPUID_SIGNATURE /* Paravirtualization CPUIDs */ -memcpy(signature, KVMKVMKVM, 12); +memcpy(signature, KVMKVMKVMKVM, 12); pv_ent = cpuid_ent[cpuid_nent++]; memset(pv_ent, 0, sizeof(*pv_ent)); pv_ent-function = KVM_CPUID_SIGNATURE; -- 1.6.0.4.623.g171d7 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/12] VT-d: adapt domain init and destroy functions for IOMMU API
Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- drivers/pci/intel-iommu.c | 30 +- include/linux/intel-iommu.h |2 -- 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 7f12852..59b9cdb 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -35,6 +35,7 @@ #include linux/mempool.h #include linux/timer.h #include linux/iova.h +#include linux/iommu.h #include linux/intel-iommu.h #include asm/cacheflush.h #include asm/iommu.h @@ -2779,32 +2780,34 @@ static struct dmar_domain *iommu_alloc_vm_domain(void) return domain; } -struct dmar_domain *intel_iommu_alloc_domain(void) +static int intel_iommu_domain_init(struct iommu_domain *domain) { - struct dmar_domain *domain; + struct dmar_domain *dmar_domain; - domain = iommu_alloc_vm_domain(); - if (!domain) { + dmar_domain = iommu_alloc_vm_domain(); + if (!dmar_domain) { printk(KERN_ERR intel_iommu_domain_alloc: domain == NULL\n); - return NULL; + return -ENOMEM; } - if (vm_domain_init(domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { + if (vm_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { printk(KERN_ERR intel_iommu_domain_alloc: domain_init() failed\n); - vm_domain_exit(domain); - return NULL; + vm_domain_exit(dmar_domain); + return -ENOMEM; } + domain-priv = dmar_domain; - return domain; + return 0; } -EXPORT_SYMBOL_GPL(intel_iommu_alloc_domain); -void intel_iommu_free_domain(struct dmar_domain *domain) +static void intel_iommu_domain_destroy(struct iommu_domain *domain) { - vm_domain_exit(domain); + struct dmar_domain *dmar_domain = domain-priv; + + domain-priv = NULL; + vm_domain_exit(dmar_domain); } -EXPORT_SYMBOL_GPL(intel_iommu_free_domain); int intel_iommu_assign_device(struct dmar_domain *domain, struct pci_dev *pdev) @@ -2922,3 +2925,4 @@ u64 intel_iommu_iova_to_pfn(struct dmar_domain *domain, u64 iova) return pfn VTD_PAGE_SHIFT; } EXPORT_SYMBOL_GPL(intel_iommu_iova_to_pfn); + diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c2f37b8..5a4ce23 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -337,8 +337,6 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); -struct dmar_domain *intel_iommu_alloc_domain(void); -void intel_iommu_free_domain(struct dmar_domain *domain); int intel_iommu_assign_device(struct dmar_domain *domain, struct pci_dev *pdev); void intel_iommu_deassign_device(struct dmar_domain *domain, -- 1.5.6.4 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Kvm: Qemu: save nvram
On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote: This patch to save the nvram. It save the nvram by specify the arg of -name.And the saved file named by the arg. If do not specify the arg, it will not save the nvram I think we might be better off having an explicit command line arg for nvram path rather than hardcoding the directory, because there may well be times where you want to have nvram saved, but don't want to specify -name, and vica-verca. -nvram foo.data could prepend a default directory of $localstatedir/lib/qemu/nvram, where $localstatedir is set from 'configure' script, or -nvram /some/path/foo.data would use the explicit path given. diff --git a/qemu/target-ia64/firmware.h b/qemu/target-ia64/firmware.h index 553a9f9..71aef2a 100644 --- a/qemu/target-ia64/firmware.h +++ b/qemu/target-ia64/firmware.h @@ -34,11 +34,27 @@ [..snip...] +#define NVRAM_DIR /usr/local/share/qemu/nvram/ This is definitely wrong. You cannot assume /usr/local as the install prefix, and using '$prefix/share' violates the FHS. '$prefix/share' is for readonly data that can be shared across machines, not variable runtime state data. I'd expect it to be in $localstatedir/lib/qemu/nvram, which would normally default to $prefix/var/lib/qemu/nvram, but for distro package builds typically be overridden to /var/lib/qemu/nvram. Daniel -- |: Red Hat, Engineering, London -o- http://people.redhat.com/berrange/ :| |: http://libvirt.org -o- http://virt-manager.org -o- http://ovirt.org :| |: http://autobuild.org -o- http://search.cpan.org/~danberr/ :| |: GnuPG: 7D3B9505 -o- F3C9 553F A1DA 4AC2 5648 23C1 B3DF F742 7D3B 9505 :| -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: splice() based interguest networking
Anthony Liguori wrote: 1) On TX, we vmsplice() from the sg buffer to one pipe. This will end up being vmsplice_to_pipe() in the kernel which is zero-copy. That implies we do the MAC address switching in userspace (or that this is a point-to-point protocol, which severely limits its usefulness). I think we can still have one-copy interguest networking if we have proper skb destructors; and since we need that for the more important guest-external copyless support, we basically get it for free (if delayed). -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/12] KVM: change to use new APIs for kvm vtd
From: Weidong Han [EMAIL PROTECTED] This patch changes to use new APIs for KVM VT-d, and add device deassignment for hotplug. [Joerg: coding style cleanups] Signed-off-by: Weidong Han [EMAIL PROTECTED] Signed-off-by: Joerg Roedel [EMAIL PROTECTED] --- include/linux/kvm.h |5 ++ include/linux/kvm_host.h | 23 -- virt/kvm/kvm_main.c | 49 - virt/kvm/vtd.c | 107 +++--- 4 files changed, 143 insertions(+), 41 deletions(-) diff --git a/include/linux/kvm.h b/include/linux/kvm.h index 0997e6f..49432e9 100644 --- a/include/linux/kvm.h +++ b/include/linux/kvm.h @@ -395,6 +395,9 @@ struct kvm_trace_rec { #if defined(CONFIG_X86) #define KVM_CAP_DEVICE_MSI 20 #endif +#if defined(CONFIG_X86) || defined(CONFIG_IA64) +#define KVM_CAP_DEVICE_DEASSIGNMENT 21 +#endif /* * ioctls for VM fds @@ -428,6 +431,8 @@ struct kvm_trace_rec { struct kvm_assigned_pci_dev) #define KVM_ASSIGN_IRQ _IOR(KVMIO, 0x70, \ struct kvm_assigned_irq) +#define KVM_DEASSIGN_PCI_DEVICE _IOR(KVMIO, 0x71, \ +struct kvm_assigned_pci_dev) /* * ioctls for vcpu fds diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8091a4d..cb1d404 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -329,9 +329,12 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); #ifdef CONFIG_DMAR int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, unsigned long npages); -int kvm_iommu_map_guest(struct kvm *kvm, - struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_iommu_map_guest(struct kvm *kvm); int kvm_iommu_unmap_guest(struct kvm *kvm); +int kvm_assign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); +int kvm_deassign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev); #else /* CONFIG_DMAR */ static inline int kvm_iommu_map_pages(struct kvm *kvm, gfn_t base_gfn, @@ -340,9 +343,7 @@ static inline int kvm_iommu_map_pages(struct kvm *kvm, return 0; } -static inline int kvm_iommu_map_guest(struct kvm *kvm, - struct kvm_assigned_dev_kernel - *assigned_dev) +static inline int kvm_iommu_map_guest(struct kvm *kvm) { return -ENODEV; } @@ -351,6 +352,18 @@ static inline int kvm_iommu_unmap_guest(struct kvm *kvm) { return 0; } + +static inline int kvm_assign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + return 0; +} + +static inline int kvm_deassign_device(struct kvm *kvm, + struct kvm_assigned_dev_kernel *assigned_dev) +{ + return 0; +} #endif /* CONFIG_DMAR */ static inline void kvm_guest_enter(void) diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 8dab7ce..fe6aba0 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -472,7 +472,12 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm, list_add(match-list, kvm-arch.assigned_dev_head); if (assigned_dev-flags KVM_DEV_ASSIGN_ENABLE_IOMMU) { - r = kvm_iommu_map_guest(kvm, match); + if (!kvm-arch.intel_iommu_domain) { + r = kvm_iommu_map_guest(kvm); + if (r) + goto out_list_del; + } + r = kvm_assign_device(kvm, match); if (r) goto out_list_del; } @@ -494,6 +499,35 @@ out_free: } #endif +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT +static int kvm_vm_ioctl_deassign_device(struct kvm *kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + int r = 0; + struct kvm_assigned_dev_kernel *match; + + mutex_lock(kvm-lock); + + match = kvm_find_assigned_dev(kvm-arch.assigned_dev_head, + assigned_dev-assigned_dev_id); + if (!match) { + printk(KERN_INFO %s: device hasn't been assigned before, + so cannot be deassigned\n, __func__); + r = -EINVAL; + goto out; + } + + if (assigned_dev-flags KVM_DEV_ASSIGN_ENABLE_IOMMU) + kvm_deassign_device(kvm, match); + + kvm_free_assigned_device(kvm, match); + +out: + mutex_unlock(kvm-lock); + return r; +} +#endif + static inline int valid_vcpu(int n) { return likely(n = 0 n KVM_MAX_VCPUS); @@ -1833,6 +1867,19 @@ static long kvm_vm_ioctl(struct file *filp, break; } #endif +#ifdef KVM_CAP_DEVICE_DEASSIGNMENT + case KVM_DEASSIGN_PCI_DEVICE: { + struct kvm_assigned_pci_dev assigned_dev; + + r = -EFAULT; + if
[PATCH 2/2] qemu: ppc: fix build warnings
Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] --- qemu/hw/device_tree.c | 14 +++--- qemu/hw/device_tree.h | 12 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/qemu/hw/device_tree.c b/qemu/hw/device_tree.c index e73129d..2621ff1 100644 --- a/qemu/hw/device_tree.c +++ b/qemu/hw/device_tree.c @@ -31,7 +31,7 @@ /* This function reads device-tree property files that are of * a single cell size */ -uint32_t read_proc_dt_prop_cell(char *path_in_device_tree) +uint32_t read_proc_dt_prop_cell(const char *path_in_device_tree) { char *buf = NULL; int i; @@ -65,7 +65,7 @@ uint32_t read_proc_dt_prop_cell(char *path_in_device_tree) #ifdef CONFIG_LIBFDT /* support functions */ -static int get_offset_of_node(void *fdt, char *node_path) +static int get_offset_of_node(void *fdt, const char *node_path) { int node_offset; node_offset = fdt_path_offset(fdt, node_path); @@ -78,7 +78,7 @@ static int get_offset_of_node(void *fdt, char *node_path) } /* public functions */ -void *load_device_tree(char *filename_path, unsigned long load_addr) +void *load_device_tree(const char *filename_path, unsigned long load_addr) { int dt_file_size; int dt_file_load_size; @@ -134,7 +134,7 @@ fail: return NULL; } -void dump_device_tree_to_file(void *fdt, char *filename) +void dump_device_tree_to_file(void *fdt, const char *filename) { int fd; fd = open(filename, O_RDWR|O_CREAT, O_RDWR); @@ -148,7 +148,7 @@ void dump_device_tree_to_file(void *fdt, char *filename) close(fd); } -void dt_cell(void *fdt, char *node_path, char *property, +void dt_cell(void *fdt, const char *node_path, const char *property, uint32_t val) { int offset; @@ -163,7 +163,7 @@ void dt_cell(void *fdt, char *node_path, char *property, } /* This function is to manipulate a cell with multiple values */ -void dt_cell_multi(void *fdt, char *node_path, char *property, +void dt_cell_multi(void *fdt, const char *node_path, const char *property, uint32_t *val_array, int size) { int offset; @@ -177,7 +177,7 @@ void dt_cell_multi(void *fdt, char *node_path, char *property, } } -void dt_string(void *fdt, char *node_path, char *property, +void dt_string(void *fdt, const char *node_path, const char *property, char *string) { int offset; diff --git a/qemu/hw/device_tree.h b/qemu/hw/device_tree.h index 05a81ef..a311309 100644 --- a/qemu/hw/device_tree.h +++ b/qemu/hw/device_tree.h @@ -11,16 +11,16 @@ */ /* device-tree proc support functions */ -uint32_t read_proc_dt_prop_cell(char *path_in_device_tree); +uint32_t read_proc_dt_prop_cell(const char *path_in_device_tree); #ifdef CONFIG_LIBFDT /* device tree functions */ -void *load_device_tree(char *filename_path, target_ulong load_addr); -void dump_device_tree_to_file(void *fdt, char *filename); -void dt_cell(void *fdt, char *node_path, char *property, +void *load_device_tree(const char *filename_path, target_ulong load_addr); +void dump_device_tree_to_file(void *fdt, const char *filename); +void dt_cell(void *fdt, const char *node_path, const char *property, uint32_t val); -void dt_cell_multi(void *fdt, char *node_path, char *property, +void dt_cell_multi(void *fdt, const char *node_path, const char *property, uint32_t *val_array, int size); -void dt_string(void *fdt, char *node_path, char *property, +void dt_string(void *fdt, const char *node_path, const char *property, char *string); #endif -- 1.5.6.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
powerpc kvm-userspace build fixes
These patches fix the kvm-userspace qemu build after a recent merge with upstream qemu. I'm also seeing a build dependency issue with dyngen-opc.h that I don't see upstream. I haven't sorted that out yet, but make qemu/ppcemb-softmmu/dyngen-opc.h first works around the problem. -Hollis -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] qemu: ppc: fix build after qemu upstream changes
Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] --- qemu/hw/ppc440_bamboo.c | 37 + 1 files changed, 21 insertions(+), 16 deletions(-) diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c index bf42245..79e4ea8 100644 --- a/qemu/hw/ppc440_bamboo.c +++ b/qemu/hw/ppc440_bamboo.c @@ -38,13 +38,14 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size, qemu_irq *pic; ppc4xx_pci_t *pci; CPUState *env; - uint64_t ep=0; - uint64_t la=0; - int is_linux=1; /* Will assume allways is Linux for now */ - target_long kernel_size=0; - target_ulong initrd_base=0; - target_long initrd_size=0; - target_ulong dt_base=0; +uint64_t elf_entry; +uint64_t elf_lowaddr; + target_ulong entry = 0; + target_ulong loadaddr = 0; + target_long kernel_size = 0; + target_ulong initrd_base = 0; + target_long initrd_size = 0; + target_ulong dt_base = 0; void *fdt; int ret; int ram_stick_sizes[] = {25620, 12820, 6420, @@ -105,20 +106,24 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size, /* load kernel with uboot loader */ printf(%s: load kernel\n, __func__); - ret = load_uimage(kernel_filename, ep, la, kernel_size, is_linux); - if (ret 0) - ret = load_elf(kernel_filename, 0, ep, la, NULL); - - if (ret 0) { + kernel_size = load_uimage(kernel_filename, entry, loadaddr, NULL); + if (kernel_size 0) { + kernel_size = load_elf(kernel_filename, 0, elf_entry, elf_lowaddr, + NULL); +entry = elf_entry; +loadaddr = elf_lowaddr; +} + + if (kernel_size 0) { fprintf(stderr, qemu: could not load kernel '%s'\n, kernel_filename); exit(1); } - printf(kernel is at guest address: 0x%lx\n, (unsigned long)la); + printf(kernel is at guest address: 0x%lx\n, (unsigned long)loadaddr); /* load initrd */ if (initrd_filename) { - initrd_base = kernel_size + la; + initrd_base = kernel_size + loadaddr; printf(%s: load initrd\n, __func__); initrd_size = load_image(initrd_filename, phys_ram_base + initrd_base); @@ -156,7 +161,7 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size, if (initrd_base) dt_base = initrd_base + initrd_size; else - dt_base = kernel_size + la; + dt_base = kernel_size + loadaddr; fdt = load_device_tree(buf, (unsigned long)(phys_ram_base + dt_base)); if (fdt == NULL) { @@ -188,7 +193,7 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size, /* location of device tree in register */ env-gpr[3] = dt_base; #endif - env-nip = ep; + env-nip = entry; } if (pci) { -- 1.5.6.5 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/5] KVM: add KVM_USERSPACE_IRQ_SOURCE_ID assertions
Mark McLoughlin wrote: Make sure kvm_request_irq_source_id() never returns KVM_USERSPACE_IRQ_SOURCE_ID. Likewise, check that kvm_free_irq_source_id() never accepts KVM_USERSPACE_IRQ_SOURCE_ID. An alternative way to do this is to drop the distinction KVM_USERSPACE_IRQ_SOURCE_ID has, and simply allocate it via the normal irq source id allocation API (and store it in struct kvm). That's not worth the churn though. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
Luis Henriques wrote: On Sun, Nov 30, 2008 at 10:44:55PM +0200, Avi Kivity wrote: Luis Henriques wrote: No, I was not able to reproduce the issue. Please let me know if you need some more information on my system (.config, for instance). Were you using some other virtualization product? Were you running suspend/resume? No for both questions. However, I had compiled support for suspend (not sure if this is what you mean by running suspend/resume) - This is a feature I used only once or twice... The underlying problem is that an svm instruction has been executed, but svm is disabled. Since kvm enables svm unconditionally on all processors on startup, there are only a few paths that can potentially trigger this: - another virtualization module turned svm off - cpu hotadd/hotremove (suspend/resume triggers this) - something did a read-modify-write cycle on cr4 (which contains the svm enable bit) while kvm enabled that bit - core was turned off (does linux power management do that?) Anything ring a bell? -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] KVM: Qemu: push_nmi should be only used by I386 Arch.
Hollis Blanchard wrote: Well, it happens, but I do wish that more people would use cscope or even grep to find all users of a symbol. That's reasonable. I also wish that Avi would get his PPC box working so he could catch build breaks like these. Cross-compilers would do as well. I now have a build box somewhere. It's now cloning the source repositories. Once I start rejecting patches as won't build, I hope people will be more careful. Acked-by: Hollis Blanchard [EMAIL PROTECTED] Applied, thanks Jan. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/4] add ksm kernel shared memory driver.
* Alan Cox ([EMAIL PROTECTED]) wrote: + r = !memcmp(old_digest, sha1_item-sha1val, SHA1_DIGEST_SIZE); + mutex_unlock(sha1_lock); + if (r) { + char *old_addr, *new_addr; + old_addr = kmap_atomic(oldpage, KM_USER0); + new_addr = kmap_atomic(newpage, KM_USER1); + r = !memcmp(old_addr+PAGEHASH_LEN, new_addr+PAGEHASH_LEN, + PAGE_SIZE-PAGEHASH_LEN); NAK - this isn't guaranteed to be robust so you could end up merging different pages one provided by a malicious attacker. I presume you're referring to the digest comparison. While there's theoretical concern of hash collision, it's mitigated by hmac(sha1) so the attacker can't brute force for known collisions. thanks, -chris -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/4] add ksm kernel shared memory driver.
On Tue, 2 Dec 2008 10:07:24 -0800 Chris Wright [EMAIL PROTECTED] wrote: * Alan Cox ([EMAIL PROTECTED]) wrote: + r = !memcmp(old_digest, sha1_item-sha1val, SHA1_DIGEST_SIZE); + mutex_unlock(sha1_lock); + if (r) { + char *old_addr, *new_addr; + old_addr = kmap_atomic(oldpage, KM_USER0); + new_addr = kmap_atomic(newpage, KM_USER1); + r = !memcmp(old_addr+PAGEHASH_LEN, new_addr+PAGEHASH_LEN, + PAGE_SIZE-PAGEHASH_LEN); NAK - this isn't guaranteed to be robust so you could end up merging different pages one provided by a malicious attacker. I presume you're referring to the digest comparison. While there's theoretical concern of hash collision, it's mitigated by hmac(sha1) so the attacker can't brute force for known collisions. Using current known techniques. A random collision is just as bad news. This code simply isn't fit for the kernel. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[BUG] virtio-pci queue allocation not page-aligned
I just spent a number of hours tracking this one down, and I'm not too thrilled about it. vp_find_vq() does the memory allocation for virtio PCI rings, and it uses kzalloc() to do it. This is bad because the ring memory *must* be page-aligned. According to Anthony, at the time this code was written, various slab allocators were checked and all happened to return page-aligned buffers. So how did I hit a problem? I had enabled CONFIG_SLUB_DEBUG_ON while investigating an unrelated problem, which offset the address by 64 bytes. One option is to add a BUG_ON(addr ~PAGE_MASK) to vp_find_vq(). That's better than nothing, but still stinks. Another is to use Kconfig to express that slab debugging breaks virtio. Also pretty lame IMHO, will look pretty funny in the Kconfig file, and that only solves today's problem. Another slab allocator or a change in behavior of an existing allocator could mean that ordinary allocations also become non-page-aligned. Finally, we could use the interface intended for exactly this purpose: the page allocator. If there's some problem with high memory, don't allocate it with GFP_HIGHMEM. -- Hollis Blanchard IBM Linux Technology Center -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
On Tue, Dec 02, 2008 at 02:23:52PM +0200, Avi Kivity wrote: Luis Henriques wrote: On Sun, Nov 30, 2008 at 10:44:55PM +0200, Avi Kivity wrote: Luis Henriques wrote: No, I was not able to reproduce the issue. Please let me know if you need some more information on my system (.config, for instance). Were you using some other virtualization product? Were you running suspend/resume? No for both questions. However, I had compiled support for suspend (not sure if this is what you mean by running suspend/resume) - This is a feature I used only once or twice... The underlying problem is that an svm instruction has been executed, but svm is disabled. Since kvm enables svm unconditionally on all processors on startup, there are only a few paths that can potentially trigger this: - another virtualization module turned svm off - cpu hotadd/hotremove (suspend/resume triggers this) - something did a read-modify-write cycle on cr4 (which contains the svm enable bit) while kvm enabled that bit - core was turned off (does linux power management do that?) Anything ring a bell? Ok, I am not sure but there is a possibility of having the vboxdrv driver loaded. _But_ I was not using, i.e., I do not use VirtualBox. In my attempts to reproduce the issue, I tried to load this module but, unfortunatly, my distro has this package broken ATM (err... in fact, the problem is not the distro but me - I am using an unstable version). vboxdrv could be a problem if I was using it, but I believe it shouldn't cause this if it is not being used... but it's just a guess. -- Luis Henriques -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] virtio-pci queue allocation not page-aligned
Hollis Blanchard wrote: Finally, we could use the interface intended for exactly this purpose: the page allocator. If there's some problem with high memory, don't allocate it with GFP_HIGHMEM. Can you work up a patch to do this? Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
Luis Henriques wrote: Ok, I am not sure but there is a possibility of having the vboxdrv driver loaded. _But_ I was not using, i.e., I do not use VirtualBox. In my attempts to reproduce the issue, I tried to load this module but, unfortunatly, my distro has this package broken ATM (err... in fact, the problem is not the distro but me - I am using an unstable version). vboxdrv could be a problem if I was using it, but I believe it shouldn't cause this if it is not being used... but it's just a guess. Let's keep an eye open on it. If it reproduces, be sure to note what drivers are loaded. Meanwhile, I don't recommend having different virtualization modules loaded concurrently. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
Luis Henriques wrote: Ok, I am not sure but there is a possibility of having the vboxdrv driver loaded. _But_ I was not using, i.e., I do not use VirtualBox. In my attempts to reproduce the issue, I tried to load this module but, unfortunatly, my distro has this package broken ATM (err... in fact, the problem is not the distro but me - I am using an unstable version). vboxdrv could be a problem if I was using it, but I believe it shouldn't cause this if it is not being used... but it's just a guess. Let's keep an eye open on it. If it reproduces, be sure to note what drivers are loaded. Meanwhile, I don't recommend having different virtualization modules loaded concurrently. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
(I am CC'ing to Steven Rostedt since he might be interested on this) On Tue, Dec 02, 2008 at 07:09:14PM +, Luis Henriques wrote: On Tue, Dec 02, 2008 at 02:23:52PM +0200, Avi Kivity wrote: Luis Henriques wrote: On Sun, Nov 30, 2008 at 10:44:55PM +0200, Avi Kivity wrote: Luis Henriques wrote: No, I was not able to reproduce the issue. Please let me know if you need some more information on my system (.config, for instance). Were you using some other virtualization product? Were you running suspend/resume? No for both questions. However, I had compiled support for suspend (not sure if this is what you mean by running suspend/resume) - This is a feature I used only once or twice... The underlying problem is that an svm instruction has been executed, but svm is disabled. Since kvm enables svm unconditionally on all processors on startup, there are only a few paths that can potentially trigger this: - another virtualization module turned svm off - cpu hotadd/hotremove (suspend/resume triggers this) - something did a read-modify-write cycle on cr4 (which contains the svm enable bit) while kvm enabled that bit - core was turned off (does linux power management do that?) Anything ring a bell? Ok, I am not sure but there is a possibility of having the vboxdrv driver loaded. _But_ I was not using, i.e., I do not use VirtualBox. In my attempts to reproduce the issue, I tried to load this module but, unfortunatly, my distro has this package broken ATM (err... in fact, the problem is not the distro but me - I am using an unstable version). vboxdrv could be a problem if I was using it, but I believe it shouldn't cause this if it is not being used... but it's just a guess. I have some other information to had to my previous email. However, I do not know whether it is related with my first bug report. It looks like ftrace may stop the CPUs in some situations and I have been playing with ftrace for some time. So, here's what I just did: started ftrace with function tracer and then started kvm. I got ugly crashes and apparently quite easy to reproduce (I get complete freeze or immediate reboot). I did not investigated this issue and, again, it may not be related with my initial report but there's definitely something wrong here, right? (just to refresh, I am using 2.6.28-rc6-7-ged31348 in x86_64 machine) -- Luis Henriques -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
[ added Ingo too ] On Tue, 2008-12-02 at 19:46 +, Luis Henriques wrote: (I am CC'ing to Steven Rostedt since he might be interested on this) On Tue, Dec 02, 2008 at 07:09:14PM +, Luis Henriques wrote: I have some other information to had to my previous email. However, I do not know whether it is related with my first bug report. It looks like ftrace may stop the CPUs in some situations and I have been playing with ftrace for some time. So, here's what I just did: started ftrace with function tracer and then started kvm. I got ugly crashes and apparently quite easy to reproduce (I get complete freeze or immediate reboot). I did not investigated this issue and, again, it may not be related with my initial report but there's definitely something wrong here, right? (just to refresh, I am using 2.6.28-rc6-7-ged31348 in x86_64 machine) Hi, ftrace only stops the CPUs on start up or shutdown of the function tracer (i.e. echo function /debugfs/tracing/current_tracer). It does not stop the CPUs at any other time. Now what ftrace does do, is to call a tracing function at pretty much every function call in the kernel. In most places this is fine, but there are some cases that this can be an issue. For example, we can not trace suspend and resume because on resume smp_processor_id() is undefined, which ftrace uses. The following must be available without recursion for the function tracer to work: local_irq_save/restore smp_processor_id preempt_enable/disable_notrace atomic_inc/dec There could be other things that might be causing the crash. Do you have a crash dump and config available? Thanks, -- Steve -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
Steven Rostedt wrote: The following must be available without recursion for the function tracer to work: local_irq_save/restore smp_processor_id preempt_enable/disable_notrace atomic_inc/dec In arch/x86/kvm/svm.c, function svm_vcpu_run(), everything between the vmrun instruction and the call to load_host_msrs() is executed without a live pda, so no smp_processor_id(). Could easily be fixed by rearranging things. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
On Tue, 2008-12-02 at 22:38 +0200, Avi Kivity wrote: Steven Rostedt wrote: The following must be available without recursion for the function tracer to work: local_irq_save/restore smp_processor_id preempt_enable/disable_notrace atomic_inc/dec In arch/x86/kvm/svm.c, function svm_vcpu_run(), everything between the vmrun instruction and the call to load_host_msrs() is executed without a live pda, so no smp_processor_id(). Could easily be fixed by rearranging things. That would be best, but if you have trouble, you could surround the trouble area with a: tracing_stop(); tracing_start(); That will prevent tracing within those locations. The function tracer will still be called, but it will exit the function without doing anything else. Note: that stops tracing on all CPUS, not just the CPU that called it. -- Steve -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
Steven Rostedt wrote: In arch/x86/kvm/svm.c, function svm_vcpu_run(), everything between the vmrun instruction and the call to load_host_msrs() is executed without a live pda, so no smp_processor_id(). Could easily be fixed by rearranging things. That would be best, but if you have trouble, you could surround the trouble area with a: tracing_stop(); tracing_start(); That will prevent tracing within those locations. The function tracer will still be called, but it will exit the function without doing anything else. Note: that stops tracing on all CPUS, not just the CPU that called it. The vmrun instruction can execute for a long time (hours, if you have a dyntick kernel and no timers scheduled), so that's less than optimal. -- I have a truly marvellous patch that fixes the bug which this signature is too narrow to contain. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Hangs
On Tue, Dec 02, 2008 at 02:09:39PM +0200, Avi Kivity wrote: xming wrote: The same guest did it again. # uname -a Linux spaceball 2.6.27.6 #1 SMP Fri Nov 14 11:51:10 CET 2008 i686 QEMU Virtual CPU version 0.9.1 AuthenticAMD GNU/Linux # date Thu Dec 19 01:54:27 WET 1912 # uptime 01:54:29 up 14666 days, 21:17, 12 users, load average: 3.99, 3.97, 3.91 What can I do to provide more info? A way to reproduce would be best. If you have access to multiple hosts, try to isolate whether it happens only on amd or only on intel. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html I have a way to reproduce my instance of the problem easily now. I was trying to build a new kernel on my guest, and found that depmod hangs guests every time. In my case, I only have an amd processor - I don't have an intel host to try it on, right now, but it happens on Ubuntu 8.04 and Ubuntu 8.10 guests, both using kvm-79 and the version of kvm that ships with ubuntu 8.10. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] kvm crashes in 2.6.28-rc6-00007-ged31348
On Tue, 2 Dec 2008, Luis Henriques wrote: Unfortunately, I have only my laptop (where the crash is occuring) and no serial port on it (I am not able to get any output from the console). Do you have any suggestion on how to collect information on the crash? I can try to configure Kdump to capture more info. It is probably caused by what Avi mentioned. I guess the best you can do is wait for a patch from Avi and try that out. Thanks, -- Steve -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/4] add ksm kernel shared memory driver.
* Alan Cox ([EMAIL PROTECTED]) wrote: On Tue, 2 Dec 2008 10:07:24 -0800 Chris Wright [EMAIL PROTECTED] wrote: * Alan Cox ([EMAIL PROTECTED]) wrote: + r = !memcmp(old_digest, sha1_item-sha1val, SHA1_DIGEST_SIZE); + mutex_unlock(sha1_lock); + if (r) { + char *old_addr, *new_addr; + old_addr = kmap_atomic(oldpage, KM_USER0); + new_addr = kmap_atomic(newpage, KM_USER1); + r = !memcmp(old_addr+PAGEHASH_LEN, new_addr+PAGEHASH_LEN, + PAGE_SIZE-PAGEHASH_LEN); NAK - this isn't guaranteed to be robust so you could end up merging different pages one provided by a malicious attacker. I presume you're referring to the digest comparison. While there's theoretical concern of hash collision, it's mitigated by hmac(sha1) so the attacker can't brute force for known collisions. Using current known techniques. A random collision is just as bad news. And, just to clarify, your concern would extend to any digest based comparison? Or are you specifically concerned about sha1? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/4] add ksm kernel shared memory driver.
On Tue, 2 Dec 2008 13:24:11 -0800 Chris Wright [EMAIL PROTECTED] wrote: Using current known techniques. A random collision is just as bad news. And, just to clarify, your concern would extend to any digest based comparison? Or are you specifically concerned about sha1? Wouldn't this issue just go away if the code simply compared the full pages, rather than skipping the hashed 128 bytes at the beginning? Given the cost of this whole operation (which, it seems, can involve copying one of the pages before testing for equality), skipping the comparison of 128 bytes seems like a bit of a premature optimization. jon -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3 of 6] kvm: ppc: directly insert shadow mappings into the hardware TLB
Formerly, we used to maintain a per-vcpu shadow TLB and on every entry to the guest would load this array into the hardware TLB. This consumed 1280 bytes of memory (64 entries of 16 bytes plus a struct page pointer each), and also required some assembly to loop over the array on every entry. Instead of saving a copy in memory, we can just store shadow mappings directly into the hardware TLB, accepting that the host kernel will clobber these as part of the normal 440 TLB round robin. When we do that we need less than half the memory, and we have decreased the exit handling time for all guest exits, at the cost of increased number of TLB misses because the host overwrites some guest entries. These savings will be increased on processors with larger TLBs or which implement intelligent flush instructions like tlbivax (which will avoid the need to walk arrays in software). In addition to that and to the code simplification, we have a greater chance of leaving other host userspace mappings in the TLB, instead of forcing all subsequent tasks to re-fault all their mappings. Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -22,19 +22,25 @@ #include linux/kvm_host.h -/* XXX Can't include mmu-44x.h because it redefines struct mm_context. */ #define PPC44x_TLB_SIZE 64 + +/* If the guest is expecting it, this can be as large as we like; we'd just + * need to find some way of advertising it. */ +#define KVM44x_GUEST_TLB_SIZE 64 + +struct kvmppc_44x_shadow_ref { + struct page *page; + u16 gtlb_index; + u8 writeable; + u8 tid; +}; struct kvmppc_vcpu_44x { /* Unmodified copy of the guest's TLB. */ - struct kvmppc_44x_tlbe guest_tlb[PPC44x_TLB_SIZE]; - /* TLB that's actually used when the guest is running. */ - struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; - /* Pages which are referenced in the shadow TLB. */ - struct page *shadow_pages[PPC44x_TLB_SIZE]; + struct kvmppc_44x_tlbe guest_tlb[KVM44x_GUEST_TLB_SIZE]; - /* Track which TLB entries we've modified in the current exit. */ - u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; + /* References to guest pages in the hardware TLB. */ + struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; struct kvm_vcpu vcpu; }; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -53,7 +53,8 @@ extern void kvmppc_emulate_dec(struct kv extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, - u64 asid, u32 flags, u32 max_bytes); + u64 asid, u32 flags, u32 max_bytes, + unsigned int gtlb_idx); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -357,12 +357,6 @@ int main(void) #ifdef CONFIG_KVM DEFINE(TLBE_BYTES, sizeof(struct kvmppc_44x_tlbe)); - DEFINE(VCPU_TO_44X, offsetof(struct kvmppc_vcpu_44x, vcpu)); - DEFINE(VCPU44x_SHADOW_TLB, - offsetof(struct kvmppc_vcpu_44x, shadow_tlb)); - DEFINE(VCPU44x_SHADOW_MOD, - offsetof(struct kvmppc_vcpu_44x, shadow_tlb_mod)); - DEFINE(VCPU_HOST_STACK, offsetof(struct kvm_vcpu, arch.host_stack)); DEFINE(VCPU_HOST_PID, offsetof(struct kvm_vcpu, arch.host_pid)); DEFINE(VCPU_GPRS, offsetof(struct kvm_vcpu, arch.gpr)); diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -96,21 +96,14 @@ void kvmppc_core_load_guest_debugstate(s void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - int i; - - /* Mark every guest entry in the shadow TLB entry modified, so that they -* will all be reloaded on the next vcpu run (instead of being -* demand-faulted). */ - for (i = 0; i = tlb_44x_hwater; i++) - kvmppc_tlbe_set_modified(vcpu, i); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { - /* Don't leave guest TLB entries resident when being de-scheduled. */ - /* XXX It would be nice to differentiate between heavyweight exit and -* sched_out here, since we could avoid the TLB flush for heavyweight -* exits. */ + /* XXX Since every guest uses TS=1 TID=0/1 mappings, we can't leave any TLB +* entries around when we're descheduled, so we must completely flush the +* TLB of all guest mappings. On the other
[PATCH 4 of 6] kvm: ppc: save and restore guest mappings on context switch
Store shadow TLB entries in memory, but only use it on host context switch (instead of every guest entry). This improves performance for most workloads on 440 by reducing the guest TLB miss rate. Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] diff --git a/arch/powerpc/include/asm/kvm_44x.h b/arch/powerpc/include/asm/kvm_44x.h --- a/arch/powerpc/include/asm/kvm_44x.h +++ b/arch/powerpc/include/asm/kvm_44x.h @@ -42,6 +42,10 @@ struct kvmppc_vcpu_44x { /* References to guest pages in the hardware TLB. */ struct kvmppc_44x_shadow_ref shadow_refs[PPC44x_TLB_SIZE]; + /* State of the shadow TLB at guest context switch time. */ + struct kvmppc_44x_tlbe shadow_tlb[PPC44x_TLB_SIZE]; + u8 shadow_tlb_mod[PPC44x_TLB_SIZE]; + struct kvm_vcpu vcpu; }; @@ -51,5 +55,7 @@ static inline struct kvmppc_vcpu_44x *to } void kvmppc_set_pid(struct kvm_vcpu *vcpu, u32 new_pid); +void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu); +void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu); #endif /* __ASM_44X_H__ */ diff --git a/arch/powerpc/kvm/44x.c b/arch/powerpc/kvm/44x.c --- a/arch/powerpc/kvm/44x.c +++ b/arch/powerpc/kvm/44x.c @@ -96,15 +96,12 @@ void kvmppc_core_load_guest_debugstate(s void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { + kvmppc_44x_tlb_load(vcpu); } void kvmppc_core_vcpu_put(struct kvm_vcpu *vcpu) { - /* XXX Since every guest uses TS=1 TID=0/1 mappings, we can't leave any TLB -* entries around when we're descheduled, so we must completely flush the -* TLB of all guest mappings. On the other hand, if there is only one -* guest, this flush is completely unnecessary. */ - _tlbia(); + kvmppc_44x_tlb_put(vcpu); } int kvmppc_core_check_processor_compat(void) diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -73,6 +73,25 @@ static inline void kvmppc_44x_tlbie(unsi ); } +static inline void kvmppc_44x_tlbre(unsigned int index, +struct kvmppc_44x_tlbe *tlbe) +{ + asm volatile( + tlbre %[word0], %[index], 0\n + mfspr %[tid], %[sprn_mmucr]\n + andi. %[tid], %[tid], 0xff\n + tlbre %[word1], %[index], 1\n + tlbre %[word2], %[index], 2\n + : [word0] =r(tlbe-word0), + [word1] =r(tlbe-word1), + [word2] =r(tlbe-word2), + [tid] =r(tlbe-tid) + : [index] r(index), + [sprn_mmucr] i(SPRN_MMUCR) + : cc + ); +} + static inline void kvmppc_44x_tlbwe(unsigned int index, struct kvmppc_44x_tlbe *stlbe) { @@ -115,6 +134,44 @@ static u32 kvmppc_44x_tlb_shadow_attrib( return attrib; } + +/* Load shadow TLB back into hardware. */ +void kvmppc_44x_tlb_load(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i = tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = vcpu_44x-shadow_tlb[i]; + + if (get_tlb_v(stlbe) get_tlb_ts(stlbe)) + kvmppc_44x_tlbwe(i, stlbe); + } +} + +static void kvmppc_44x_tlbe_set_modified(struct kvmppc_vcpu_44x *vcpu_44x, + unsigned int i) +{ + vcpu_44x-shadow_tlb_mod[i] = 1; +} + +/* Save hardware TLB to the vcpu, and invalidate all guest mappings. */ +void kvmppc_44x_tlb_put(struct kvm_vcpu *vcpu) +{ + struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); + int i; + + for (i = 0; i = tlb_44x_hwater; i++) { + struct kvmppc_44x_tlbe *stlbe = vcpu_44x-shadow_tlb[i]; + + if (vcpu_44x-shadow_tlb_mod[i]) + kvmppc_44x_tlbre(i, stlbe); + + if (get_tlb_v(stlbe) get_tlb_ts(stlbe)) + kvmppc_44x_tlbie(i); + } +} + /* Search the guest TLB for a matching entry. */ int kvmppc_44x_tlb_index(struct kvm_vcpu *vcpu, gva_t eaddr, unsigned int pid, @@ -283,6 +340,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcp ref-tid = stlbe.tid; /* Insert shadow mapping into hardware TLB. */ + kvmppc_44x_tlbe_set_modified(vcpu_44x, victim); kvmppc_44x_tlbwe(victim, stlbe); KVMTRACE_5D(STLB_WRITE, vcpu, victim, stlbe.tid, stlbe.word0, stlbe.word1, stlbe.word2, handler); -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2 of 6] powerpc/44x: declare tlb_44x_index for use in C code
KVM currently ignores the host's round robin TLB eviction selection, instead maintaining its own TLB state and its own round robin index. However, by participating in the normal 44x TLB selection, we can drop the alternate TLB processing in KVM. This results in a significant performance improvement, since that processing currently must be done on *every* guest exit. Accordingly, KVM needs to be able to access and increment tlb_44x_index. (KVM on 440 cannot be a module, so there is no need to export this symbol.) Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] Acked-by: Josh Boyer [EMAIL PROTECTED] diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h --- a/arch/powerpc/include/asm/mmu-44x.h +++ b/arch/powerpc/include/asm/mmu-44x.h @@ -56,6 +56,7 @@ #ifndef __ASSEMBLY__ extern unsigned int tlb_44x_hwater; +extern unsigned int tlb_44x_index; typedef struct { unsigned long id; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1 of 6] kvm: ppc: support large host pages
KVM on 440 has always been able to handle large guest mappings with 4K host pages -- we must, since the guest kernel uses 256MB mappings. This patch makes KVM work when the host has large pages too (tested with 64K). Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -52,8 +52,8 @@ extern int kvmppc_emulate_mmio(struct kv extern int kvmppc_emulate_mmio(struct kvm_run *run, struct kvm_vcpu *vcpu); extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu); -extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, - u64 asid, u32 flags); +extern void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, + u64 asid, u32 flags, u32 max_bytes); extern void kvmppc_mmu_priv_switch(struct kvm_vcpu *vcpu, int usermode); extern void kvmppc_mmu_switch_pid(struct kvm_vcpu *vcpu, u32 pid); diff --git a/arch/powerpc/kvm/44x_tlb.c b/arch/powerpc/kvm/44x_tlb.c --- a/arch/powerpc/kvm/44x_tlb.c +++ b/arch/powerpc/kvm/44x_tlb.c @@ -27,6 +27,13 @@ #include asm/kvm_44x.h #include 44x_tlb.h + +#ifndef PPC44x_TLBE_SIZE +#define PPC44x_TLBE_SIZE PPC44x_TLB_4K +#endif + +#define PAGE_SIZE_4K (112) +#define PAGE_MASK_4K (~(PAGE_SIZE_4K - 1)) #define PPC44x_TLB_UATTR_MASK \ (PPC44x_TLB_U0|PPC44x_TLB_U1|PPC44x_TLB_U2|PPC44x_TLB_U3) @@ -179,15 +186,26 @@ void kvmppc_tlbe_set_modified(struct kvm vcpu_44x-shadow_tlb_mod[i] = 1; } -/* Caller must ensure that the specified guest TLB entry is safe to insert into - * the shadow TLB. */ -void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gfn_t gfn, u64 asid, -u32 flags) +/** + * kvmppc_mmu_map -- create a host mapping for guest memory + * + * If the guest wanted a larger page than the host supports, only the first + * host page is mapped here and the rest are demand faulted. + * + * If the guest wanted a smaller page than the host page size, we map only the + * guest-size page (i.e. not a full host page mapping). + * + * Caller must ensure that the specified guest TLB entry is safe to insert into + * the shadow TLB. + */ +void kvmppc_mmu_map(struct kvm_vcpu *vcpu, u64 gvaddr, gpa_t gpaddr, u64 asid, +u32 flags, u32 max_bytes) { struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); struct page *new_page; struct kvmppc_44x_tlbe *stlbe; hpa_t hpaddr; + gfn_t gfn; unsigned int victim; /* Future optimization: don't overwrite the TLB entry containing the @@ -198,6 +216,7 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcp stlbe = vcpu_44x-shadow_tlb[victim]; /* Get reference to new page. */ + gfn = gpaddr PAGE_SHIFT; new_page = gfn_to_page(vcpu-kvm, gfn); if (is_error_page(new_page)) { printk(KERN_ERR Couldn't get guest page for gfn %lx!\n, gfn); @@ -220,10 +239,25 @@ void kvmppc_mmu_map(struct kvm_vcpu *vcp stlbe-tid = !(asid 0xff); /* Force TS=1 for all guest mappings. */ - /* For now we hardcode 4KB mappings, but it will be important to -* use host large pages in the future. */ - stlbe-word0 = (gvaddr PAGE_MASK) | PPC44x_TLB_VALID | PPC44x_TLB_TS - | PPC44x_TLB_4K; + stlbe-word0 = PPC44x_TLB_VALID | PPC44x_TLB_TS; + + if (max_bytes = PAGE_SIZE) { + /* Guest mapping is larger than or equal to host page size. We can use +* a native host mapping. */ + stlbe-word0 |= (gvaddr PAGE_MASK) | PPC44x_TLBE_SIZE; + } else { + /* Guest mapping is smaller than host page size. We must restrict the +* size of the mapping to be at most the smaller of the two, but for +* simplicity we fall back to a 4K mapping (this is probably what the +* guest is using anyways). */ + stlbe-word0 |= (gvaddr PAGE_MASK_4K) | PPC44x_TLB_4K; + + /* 'hpaddr' is a host page, which is larger than the mapping we're +* inserting here. To compensate, we must add the in-page offset to the +* sub-page. */ + hpaddr |= gpaddr (PAGE_MASK ^ PAGE_MASK_4K); + } + stlbe-word1 = (hpaddr 0xfc00) | ((hpaddr 32) 0xf); stlbe-word2 = kvmppc_44x_tlb_shadow_attrib(flags, vcpu-arch.msr MSR_PR); @@ -322,10 +356,8 @@ int kvmppc_44x_emul_tlbwe(struct kvm_vcp int kvmppc_44x_emul_tlbwe(struct kvm_vcpu *vcpu, u8 ra, u8 rs, u8 ws) { struct kvmppc_vcpu_44x *vcpu_44x = to_44x(vcpu); - u64 eaddr; - u64 raddr; + gva_t eaddr; u64 asid; - u32 flags; struct kvmppc_44x_tlbe *tlbe; unsigned int index; @@ -364,15 +396,22 @@ int
[PATCH 6 of 6] kvm: ppc: mostly cosmetic updates to the exit timing accounting code
The only significant changes were to kvmppc_exit_timing_write() and kvmppc_exit_timing_show(), both of which were dramatically simplified. Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -102,9 +102,8 @@ enum kvm_exit_types { __NUMBER_OF_KVM_EXIT_TYPES }; -#ifdef CONFIG_KVM_EXIT_TIMING /* allow access to big endian 32bit upper/lower parts and 64bit var */ -struct exit_timing { +struct kvmppc_exit_timing { union { u64 tv64; struct { @@ -112,7 +111,6 @@ struct exit_timing { } tv32; }; }; -#endif struct kvm_arch { }; @@ -174,8 +172,8 @@ struct kvm_vcpu_arch { u32 dbcr1; #ifdef CONFIG_KVM_EXIT_TIMING - struct exit_timing timing_exit; - struct exit_timing timing_last_enter; + struct kvmppc_exit_timing timing_exit; + struct kvmppc_exit_timing timing_last_enter; u32 last_exit_type; u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES]; u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES]; diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -132,7 +132,7 @@ int kvmppc_core_emulate_op(struct kvm_ru run-dcr.is_write = 0; vcpu-arch.io_gpr = rt; vcpu-arch.dcr_needed = 1; - account_exit(vcpu, DCR_EXITS); + kvmppc_account_exit(vcpu, DCR_EXITS); emulated = EMULATE_DO_DCR; } @@ -152,7 +152,7 @@ int kvmppc_core_emulate_op(struct kvm_ru run-dcr.data = vcpu-arch.gpr[rs]; run-dcr.is_write = 1; vcpu-arch.dcr_needed = 1; - account_exit(vcpu, DCR_EXITS); + kvmppc_account_exit(vcpu, DCR_EXITS); emulated = EMULATE_DO_DCR; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -202,7 +202,7 @@ int kvmppc_handle_exit(struct kvm_run *r break; case BOOKE_INTERRUPT_EXTERNAL: - account_exit(vcpu, EXT_INTR_EXITS); + kvmppc_account_exit(vcpu, EXT_INTR_EXITS); if (need_resched()) cond_resched(); r = RESUME_GUEST; @@ -212,7 +212,7 @@ int kvmppc_handle_exit(struct kvm_run *r /* Since we switched IVPR back to the host's value, the host * handled this interrupt the moment we enabled interrupts. * Now we just offer it a chance to reschedule the guest. */ - account_exit(vcpu, DEC_EXITS); + kvmppc_account_exit(vcpu, DEC_EXITS); if (need_resched()) cond_resched(); r = RESUME_GUEST; @@ -225,7 +225,7 @@ int kvmppc_handle_exit(struct kvm_run *r vcpu-arch.esr = vcpu-arch.fault_esr; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_PROGRAM); r = RESUME_GUEST; - account_exit(vcpu, USR_PR_INST); + kvmppc_account_exit(vcpu, USR_PR_INST); break; } @@ -233,7 +233,7 @@ int kvmppc_handle_exit(struct kvm_run *r switch (er) { case EMULATE_DONE: /* don't overwrite subtypes, just account kvm_stats */ - account_exit_stat(vcpu, EMULATED_INST_EXITS); + kvmppc_account_exit_stat(vcpu, EMULATED_INST_EXITS); /* Future optimization: only reload non-volatiles if * they were actually modified by emulation. */ r = RESUME_GUEST_NV; @@ -259,7 +259,7 @@ int kvmppc_handle_exit(struct kvm_run *r case BOOKE_INTERRUPT_FP_UNAVAIL: kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_FP_UNAVAIL); - account_exit(vcpu, FP_UNAVAIL); + kvmppc_account_exit(vcpu, FP_UNAVAIL); r = RESUME_GUEST; break; @@ -267,20 +267,20 @@ int kvmppc_handle_exit(struct kvm_run *r vcpu-arch.dear = vcpu-arch.fault_dear; vcpu-arch.esr = vcpu-arch.fault_esr; kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_DATA_STORAGE); - account_exit(vcpu, DSI_EXITS); + kvmppc_account_exit(vcpu, DSI_EXITS); r = RESUME_GUEST; break; case BOOKE_INTERRUPT_INST_STORAGE:
[PATCH 0 of 6] PowerPC KVM patches for 2.6.29
Hi Avi, here's the latest batch of PowerPC kernel patches. The first set dramatically improve performance. Most importantly, we add support for large host pages with KVM (i.e. PAGE_SHIFT 12). (Large *guest* pages have already been supported since day 1, since the guest kernel uses them for the linear map.) Followup patches further improve performance by changing how we manage the shadow TLB. The last two add some accounting code to easily discover performance bottlenecks. This is especially important since the 440 core lacks performance monitoring hardware. These patches, in conjunction with 64KB pages on guest and host, get us to 96% of native performance for compute-bound workloads, which I'm pretty happy with. See http://kvm.qumranet.com/kvmwiki/PowerPC_Exittimings for more details (those statistics were gathered using the accounting patches). These have been tested pretty thoroughly for several weeks. Please apply for 2.6.29. Thanks! -Hollis -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5 of 6] kvm: ppc: Implement in-kernel exit timing statistics
Existing KVM statistics are either just counters (kvm_stat) reported for KVM generally or trace based aproaches like kvm_trace. For KVM on powerpc we had the need to track the timings of the different exit types. While this could be achieved parsing data created with a kvm_trace extension this adds too much overhead (at least on embedded PowerPC) slowing down the workloads we wanted to measure. Therefore this patch adds a in-kernel exit timing statistic to the powerpc kvm code. These statistic is available per vmvcpu under the kvm debugfs directory. As this statistic is low, but still some overhead it can be enabled via a .config entry and should be off by default. Since this patch touched all powerpc kvm_stat code anyway this code is now merged and simplified together with the exit timing statistic code (still working with exit timing disabled in .config). Signed-off-by: Christian Ehrhardt [EMAIL PROTECTED] Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -71,6 +71,49 @@ struct kvmppc_44x_tlbe { u32 word2; }; +enum kvm_exit_types { + MMIO_EXITS, + DCR_EXITS, + SIGNAL_EXITS, + ITLB_REAL_MISS_EXITS, + ITLB_VIRT_MISS_EXITS, + DTLB_REAL_MISS_EXITS, + DTLB_VIRT_MISS_EXITS, + SYSCALL_EXITS, + ISI_EXITS, + DSI_EXITS, + EMULATED_INST_EXITS, + EMULATED_MTMSRWE_EXITS, + EMULATED_WRTEE_EXITS, + EMULATED_MTSPR_EXITS, + EMULATED_MFSPR_EXITS, + EMULATED_MTMSR_EXITS, + EMULATED_MFMSR_EXITS, + EMULATED_TLBSX_EXITS, + EMULATED_TLBWE_EXITS, + EMULATED_RFI_EXITS, + DEC_EXITS, + EXT_INTR_EXITS, + HALT_WAKEUP, + USR_PR_INST, + FP_UNAVAIL, + DEBUG_EXITS, + TIMEINGUEST, + __NUMBER_OF_KVM_EXIT_TYPES +}; + +#ifdef CONFIG_KVM_EXIT_TIMING +/* allow access to big endian 32bit upper/lower parts and 64bit var */ +struct exit_timing { + union { + u64 tv64; + struct { + u32 tbu, tbl; + } tv32; + }; +}; +#endif + struct kvm_arch { }; @@ -130,6 +173,19 @@ struct kvm_vcpu_arch { u32 dbcr0; u32 dbcr1; +#ifdef CONFIG_KVM_EXIT_TIMING + struct exit_timing timing_exit; + struct exit_timing timing_last_enter; + u32 last_exit_type; + u32 timing_count_type[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_sum_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_sum_quad_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_min_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_max_duration[__NUMBER_OF_KVM_EXIT_TYPES]; + u64 timing_last_exit; + struct dentry *debugfs_exit_timing; +#endif + u32 last_inst; ulong fault_dear; ulong fault_esr; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -381,5 +381,16 @@ int main(void) DEFINE(PTE_SHIFT, PTE_SHIFT); #endif +#ifdef CONFIG_KVM_EXIT_TIMING + DEFINE(VCPU_TIMING_EXIT_TBU, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbu)); + DEFINE(VCPU_TIMING_EXIT_TBL, offsetof(struct kvm_vcpu, + arch.timing_exit.tv32.tbl)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBU, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbu)); + DEFINE(VCPU_TIMING_LAST_ENTER_TBL, offsetof(struct kvm_vcpu, + arch.timing_last_enter.tv32.tbl)); +#endif + return 0; } diff --git a/arch/powerpc/kvm/44x_emulate.c b/arch/powerpc/kvm/44x_emulate.c --- a/arch/powerpc/kvm/44x_emulate.c +++ b/arch/powerpc/kvm/44x_emulate.c @@ -22,6 +22,7 @@ #include asm/dcr-regs.h #include asm/disassemble.h #include asm/kvm_44x.h +#include timing.h #include booke.h #include 44x_tlb.h @@ -58,11 +59,11 @@ int kvmppc_core_emulate_op(struct kvm_ru int ws; switch (get_op(inst)) { - case OP_RFI: switch (get_xop(inst)) { case XOP_RFI: kvmppc_emul_rfi(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFI_EXITS); *advance = 0; break; @@ -78,10 +79,12 @@ int kvmppc_core_emulate_op(struct kvm_ru case XOP_MFMSR: rt = get_rt(inst); vcpu-arch.gpr[rt] = vcpu-arch.msr; + kvmppc_set_exit_type(vcpu, EMULATED_MFMSR_EXITS); break; case XOP_MTMSR: rs = get_rs(inst); + kvmppc_set_exit_type(vcpu, EMULATED_MTMSR_EXITS);
Re: [BUG] virtio-pci queue allocation not page-aligned
On Wednesday 03 December 2008 05:38:21 Hollis Blanchard wrote: I just spent a number of hours tracking this one down, and I'm not too thrilled about it. vp_find_vq() does the memory allocation for virtio PCI rings, and it uses kzalloc() to do it. This is bad because the ring memory *must* be page-aligned. According to Anthony, at the time this code was written, various slab allocators were checked and all happened to return page-aligned buffers. So how did I hit a problem? I had enabled CONFIG_SLUB_DEBUG_ON while investigating an unrelated problem, which offset the address by 64 bytes. One option is to add a BUG_ON(addr ~PAGE_MASK) to vp_find_vq(). That's better than nothing, but still stinks. It's a bug, we fix it. I've complained before, but since there was no evidence of it actually breaking, I didn't push. Prepare a patch, I'll try to get it in this release. Thanks, Rusty. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/4] add ksm kernel shared memory driver.
On Tue, 2 Dec 2008 13:24:11 -0800 Chris Wright [EMAIL PROTECTED] wrote: * Alan Cox ([EMAIL PROTECTED]) wrote: On Tue, 2 Dec 2008 10:07:24 -0800 Chris Wright [EMAIL PROTECTED] wrote: * Alan Cox ([EMAIL PROTECTED]) wrote: + r = !memcmp(old_digest, sha1_item-sha1val, SHA1_DIGEST_SIZE); + mutex_unlock(sha1_lock); + if (r) { + char *old_addr, *new_addr; + old_addr = kmap_atomic(oldpage, KM_USER0); + new_addr = kmap_atomic(newpage, KM_USER1); + r = !memcmp(old_addr+PAGEHASH_LEN, new_addr+PAGEHASH_LEN, + PAGE_SIZE-PAGEHASH_LEN); NAK - this isn't guaranteed to be robust so you could end up merging different pages one provided by a malicious attacker. I presume you're referring to the digest comparison. While there's theoretical concern of hash collision, it's mitigated by hmac(sha1) so the attacker can't brute force for known collisions. Using current known techniques. A random collision is just as bad news. And, just to clarify, your concern would extend to any digest based comparison? Or are you specifically concerned about sha1? Taken off list -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] virtio-pci queue allocation not page-aligned
On Wed, 2008-12-03 at 08:35 +1030, Rusty Russell wrote: On Wednesday 03 December 2008 05:38:21 Hollis Blanchard wrote: I just spent a number of hours tracking this one down, and I'm not too thrilled about it. vp_find_vq() does the memory allocation for virtio PCI rings, and it uses kzalloc() to do it. This is bad because the ring memory *must* be page-aligned. According to Anthony, at the time this code was written, various slab allocators were checked and all happened to return page-aligned buffers. So how did I hit a problem? I had enabled CONFIG_SLUB_DEBUG_ON while investigating an unrelated problem, which offset the address by 64 bytes. One option is to add a BUG_ON(addr ~PAGE_MASK) to vp_find_vq(). That's better than nothing, but still stinks. It's a bug, we fix it. I've complained before, but since there was no evidence of it actually breaking, I didn't push. Prepare a patch, I'll try to get it in this release. virtio: ring queues must be page-aligned kzalloc() does not guarantee page alignment, and in fact this broke when I enabled CONFIG_SLUB_DEBUG_ON. Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] --- Tested with virtio-blk root filesystem. diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c --- a/drivers/virtio/virtio_pci.c +++ b/drivers/virtio/virtio_pci.c @@ -237,7 +237,8 @@ static struct virtqueue *vp_find_vq(stru info-queue_index = index; info-num = num; - info-queue = kzalloc(PAGE_ALIGN(vring_size(num)), GFP_KERNEL); + info-queue = alloc_pages_exact(PAGE_ALIGN(vring_size(num)), + GFP_KERNEL|__GFP_ZERO); if (info-queue == NULL) { err = -ENOMEM; goto out_info; -- Hollis Blanchard IBM Linux Technology Center -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Hangs
I have a way to reproduce my instance of the problem easily now. I was trying to build a new kernel on my guest, and found that depmod hangs guests every time. In my case, I only have an amd processor - I don't have an intel host to try it on, right now, but it happens on Ubuntu 8.04 and Ubuntu 8.10 guests, both using kvm-79 and the version of kvm that ships with ubuntu 8.10. I have AMD too, vanilla kernel 2.6.27.6 and kvm-79 (although I have this before 79). the guest is SMP (UP guests hang too but less frequent). depmod does not hang here (not reproducible). Heavy CPU + heavy IO on nfs mounts triggers this on my side. I have a very subjective feeling that it happens more frequently when the host has less uptime (freshly rebooted). -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Hangs
Sounds like your configuration is very similar to mine. I'm also on a vanilla kernel (2.6.27.7 in my case) with kvm-79 and AMD processors. You sparked my curiosity on the depmod -a issue, so I spent some time trying it on different configurations. I have two servers: * 1.8GHz AMD Opteron 2210 on an HP DL385G2, AMD Opteron 2210 * 2.5GHz AMD Athlon 4850e on a green build w/ a Gigabyte GA-MA74Gm-S2 I can reproduce it on the Gigagbyte build with ease. On the HP DL385G2 server, I could reproduce it on one of my guests but only once in every four times (and with multiple guests running, not sure if that made a difference). Like you, I also have had the feeling that my occassional hangs were more likely on a freshly reboot, but don't have anything real conclusive to prove it. I'm afraid I don't have an Intel-based server around right now to see if this is an AMD-only issue. I might be able to scrounge up an HP DL380G5 (with an Intel Core 2) but I'm not sure. Chris On Wed, Dec 03, 2008 at 12:01:32AM +0100, xming wrote: I have a way to reproduce my instance of the problem easily now. I was trying to build a new kernel on my guest, and found that depmod hangs guests every time. In my case, I only have an amd processor - I don't have an intel host to try it on, right now, but it happens on Ubuntu 8.04 and Ubuntu 8.10 guests, both using kvm-79 and the version of kvm that ships with ubuntu 8.10. I have AMD too, vanilla kernel 2.6.27.6 and kvm-79 (although I have this before 79). the guest is SMP (UP guests hang too but less frequent). depmod does not hang here (not reproducible). Heavy CPU + heavy IO on nfs mounts triggers this on my side. I have a very subjective feeling that it happens more frequently when the host has less uptime (freshly rebooted). -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [SR-IOV driver example 0/3 resend] introduction
On Tue, Dec 2, 2008 at 1:27 AM, Yu Zhao [EMAIL PROTECTED] wrote: SR-IOV drivers of Intel 82576 NIC are available. There are two parts of the drivers: Physical Function driver and Virtual Function driver. The PF driver is based on the IGB driver and is used to control PF to allocate hardware specific resources and interface with the SR-IOV core. The VF driver is a new NIC driver that is same as the traditional PCI device driver. It works in both the host and the guest (Xen and KVM) environment. These two drivers are testing versions and they are *only* intended to show how to use SR-IOV API. Intel 82576 NIC specification can be found at: http://download.intel.com/design/network/datashts/82576_Datasheet_v2p1.pdf [SR-IOV driver example 0/3 resend] introduction [SR-IOV driver example 1/3 resend] PF driver: hardware specific operations [SR-IOV driver example 2/3 resend] PF driver: integrate with SR-IOV core [SR-IOV driver example 3/3 resend] VF driver: an independent PCI NIC driver -- First of all, we (e1000-devel) do support the SR-IOV API. With that said, NAK on the driver changes. We were not involved in these changes and are currently working on a version of the drivers that will make them acceptable for kernel inclusion. -- Cheers, Jeff -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] Kvm: Qemu: save nvram
Daniel P. Berrange wrote: On Tue, Dec 02, 2008 at 03:01:20PM +0200, Avi Kivity wrote: Daniel P. Berrange wrote: On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote: This patch to save the nvram. It save the nvram by specify the arg of -name.And the saved file named by the arg. If do not specify the arg, it will not save the nvram I think we might be better off having an explicit command line arg for nvram path rather than hardcoding the directory, because there may well be times where you want to have nvram saved, but don't want to specify -name, and vica-verca. -nvram foo.data If acceptable for upstream, it should be the best choice. could prepend a default directory of $localstatedir/lib/qemu/nvram, where $localstatedir is set from 'configure' script, or -nvram /some/path/foo.data would use the explicit path given. I prefer current directory if relative path is given. Since we encourage running qemu as an unprivileged user, and we don't want a world-writable directory, each user will have to provide a private storage location. Fine by me - avoids needing to embed any path in QEMU code at all then So we don't need a default path ? Yang, could you address Daniel and Avi's comments in next version ? Xiantao -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)
On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote: Hi, this patch series makes the current KVM device passthrough code generic enough so that other IOMMU implementation can also plug into this code. It works by factoring the functions Vt-d code exports to KVM into a generic interface which allows different backends. This is the second version of the patchset. The most important change to the previous version is that this patchset was rebased to the improved API from Han Weidong which supports multiple devices per IOMMU domain. For completeness, this series also includes the patches from Han with some cleanups. So this patchset can be applied on current avi/master tree. Have you tried porting any of the current iommu controllers to this new framework to see if it works properly for them? thanks, greg k-h -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] Kvm: Qemu: save nvram
Ok, I will save it in current directory and don't read nvram from file, if don't specify -nvram. Best Regards --yang -Original Message- From: Zhang, Xiantao Sent: 2008年12月3日 14:02 To: Daniel P. Berrange; Avi Kivity Cc: Zhang, Yang; [EMAIL PROTECTED]; kvm@vger.kernel.org Subject: RE: [PATCH] Kvm: Qemu: save nvram Daniel P. Berrange wrote: On Tue, Dec 02, 2008 at 03:01:20PM +0200, Avi Kivity wrote: Daniel P. Berrange wrote: On Tue, Dec 02, 2008 at 10:25:49AM +0800, Zhang, Yang wrote: This patch to save the nvram. It save the nvram by specify the arg of -name.And the saved file named by the arg. If do not specify the arg, it will not save the nvram I think we might be better off having an explicit command line arg for nvram path rather than hardcoding the directory, because there may well be times where you want to have nvram saved, but don't want to specify -name, and vica-verca. -nvram foo.data If acceptable for upstream, it should be the best choice. could prepend a default directory of $localstatedir/lib/qemu/nvram, where $localstatedir is set from 'configure' script, or -nvram /some/path/foo.data would use the explicit path given. I prefer current directory if relative path is given. Since we encourage running qemu as an unprivileged user, and we don't want a world-writable directory, each user will have to provide a private storage location. Fine by me - avoids needing to embed any path in QEMU code at all then So we don't need a default path ? Yang, could you address Daniel and Avi's comments in next version ? Xiantao
[PATCH] register page alignment memory for MMIO of assigned device
MMIO of assigned device is registered as memory slot. Size of memory slot in KVM must be page size multiple. But MMIO of some devices (e.g. EHCI controller) is not page size mutiple, so it fails to register these MMIOs, thus device assignment fails. In order to solve it, need to register target page alignment memory for these MMIOs. Signed-off-by: Weidong Han [EMAIL PROTECTED] --- qemu/hw/device-assignment.c | 12 +--- 1 files changed, 9 insertions(+), 3 deletions(-) diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c index eaff09e..4a38a22 100644 --- a/qemu/hw/device-assignment.c +++ b/qemu/hw/device-assignment.c @@ -157,12 +157,18 @@ static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, region-e_physbase = e_phys; region-e_size = e_size; -if (!first_map) - kvm_destroy_phys_mem(kvm_context, old_ephys, old_esize); +if (!first_map) { +int slot = get_slot(old_ephys); +if (slot != -1) + kvm_destroy_phys_mem(kvm_context, old_ephys, + TARGET_PAGE_ALIGN(old_esize)); +} if (e_size 0) ret = kvm_register_phys_mem(kvm_context, e_phys, -region-u.r_virtbase, e_size, 0); +region-u.r_virtbase, +TARGET_PAGE_ALIGN(e_size), 0); + if (ret != 0) { fprintf(stderr, %s: Error: create new mapping failed\n, __func__); exit(1); -- 1.5.1 0001-register-page-alignment-memory-for-MMIO-of-assigned.patch Description: 0001-register-page-alignment-memory-for-MMIO-of-assigned.patch
Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)
On Tue, Dec 02, 2008 at 07:44:05PM -0800, Greg KH wrote: On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote: Hi, this patch series makes the current KVM device passthrough code generic enough so that other IOMMU implementation can also plug into this code. It works by factoring the functions Vt-d code exports to KVM into a generic interface which allows different backends. This is the second version of the patchset. The most important change to the previous version is that this patchset was rebased to the improved API from Han Weidong which supports multiple devices per IOMMU domain. For completeness, this series also includes the patches from Han with some cleanups. So this patchset can be applied on current avi/master tree. Have you tried porting any of the current iommu controllers to this new framework to see if it works properly for them? It works currently for VT-d. I also port it to AMD IOMMU currently. With some extensions (offset for start address, flags and size limitation) it is also suitable for IOMMUs like GART or similar ones. Joerg -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/12] Factor VT-d KVM functions into a generic API (with multiple device assignment support)
On Wed, Dec 03, 2008 at 08:50:49AM +0100, Joerg Roedel wrote: On Tue, Dec 02, 2008 at 07:44:05PM -0800, Greg KH wrote: On Tue, Dec 02, 2008 at 02:01:11PM +0100, Joerg Roedel wrote: Hi, this patch series makes the current KVM device passthrough code generic enough so that other IOMMU implementation can also plug into this code. It works by factoring the functions Vt-d code exports to KVM into a generic interface which allows different backends. This is the second version of the patchset. The most important change to the previous version is that this patchset was rebased to the improved API from Han Weidong which supports multiple devices per IOMMU domain. For completeness, this series also includes the patches from Han with some cleanups. So this patchset can be applied on current avi/master tree. Have you tried porting any of the current iommu controllers to this new framework to see if it works properly for them? It works currently for VT-d. I also port it to AMD IOMMU currently. With some extensions (offset for start address, flags and size limitation) it is also suitable for IOMMUs like GART or similar ones. What about the Calgary chipset? thanks, gerg k-h -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] qemu: ppc: fix build warnings
Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] --- qemu/hw/device_tree.c | 14 +++--- qemu/hw/device_tree.h | 12 ++-- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/qemu/hw/device_tree.c b/qemu/hw/device_tree.c index e73129d..2621ff1 100644 --- a/qemu/hw/device_tree.c +++ b/qemu/hw/device_tree.c @@ -31,7 +31,7 @@ /* This function reads device-tree property files that are of * a single cell size */ -uint32_t read_proc_dt_prop_cell(char *path_in_device_tree) +uint32_t read_proc_dt_prop_cell(const char *path_in_device_tree) { char *buf = NULL; int i; @@ -65,7 +65,7 @@ uint32_t read_proc_dt_prop_cell(char *path_in_device_tree) #ifdef CONFIG_LIBFDT /* support functions */ -static int get_offset_of_node(void *fdt, char *node_path) +static int get_offset_of_node(void *fdt, const char *node_path) { int node_offset; node_offset = fdt_path_offset(fdt, node_path); @@ -78,7 +78,7 @@ static int get_offset_of_node(void *fdt, char *node_path) } /* public functions */ -void *load_device_tree(char *filename_path, unsigned long load_addr) +void *load_device_tree(const char *filename_path, unsigned long load_addr) { int dt_file_size; int dt_file_load_size; @@ -134,7 +134,7 @@ fail: return NULL; } -void dump_device_tree_to_file(void *fdt, char *filename) +void dump_device_tree_to_file(void *fdt, const char *filename) { int fd; fd = open(filename, O_RDWR|O_CREAT, O_RDWR); @@ -148,7 +148,7 @@ void dump_device_tree_to_file(void *fdt, char *filename) close(fd); } -void dt_cell(void *fdt, char *node_path, char *property, +void dt_cell(void *fdt, const char *node_path, const char *property, uint32_t val) { int offset; @@ -163,7 +163,7 @@ void dt_cell(void *fdt, char *node_path, char *property, } /* This function is to manipulate a cell with multiple values */ -void dt_cell_multi(void *fdt, char *node_path, char *property, +void dt_cell_multi(void *fdt, const char *node_path, const char *property, uint32_t *val_array, int size) { int offset; @@ -177,7 +177,7 @@ void dt_cell_multi(void *fdt, char *node_path, char *property, } } -void dt_string(void *fdt, char *node_path, char *property, +void dt_string(void *fdt, const char *node_path, const char *property, char *string) { int offset; diff --git a/qemu/hw/device_tree.h b/qemu/hw/device_tree.h index 05a81ef..a311309 100644 --- a/qemu/hw/device_tree.h +++ b/qemu/hw/device_tree.h @@ -11,16 +11,16 @@ */ /* device-tree proc support functions */ -uint32_t read_proc_dt_prop_cell(char *path_in_device_tree); +uint32_t read_proc_dt_prop_cell(const char *path_in_device_tree); #ifdef CONFIG_LIBFDT /* device tree functions */ -void *load_device_tree(char *filename_path, target_ulong load_addr); -void dump_device_tree_to_file(void *fdt, char *filename); -void dt_cell(void *fdt, char *node_path, char *property, +void *load_device_tree(const char *filename_path, target_ulong load_addr); +void dump_device_tree_to_file(void *fdt, const char *filename); +void dt_cell(void *fdt, const char *node_path, const char *property, uint32_t val); -void dt_cell_multi(void *fdt, char *node_path, char *property, +void dt_cell_multi(void *fdt, const char *node_path, const char *property, uint32_t *val_array, int size); -void dt_string(void *fdt, char *node_path, char *property, +void dt_string(void *fdt, const char *node_path, const char *property, char *string); #endif -- 1.5.6.5 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] qemu: ppc: fix build after qemu upstream changes
Signed-off-by: Hollis Blanchard [EMAIL PROTECTED] --- qemu/hw/ppc440_bamboo.c | 37 + 1 files changed, 21 insertions(+), 16 deletions(-) diff --git a/qemu/hw/ppc440_bamboo.c b/qemu/hw/ppc440_bamboo.c index bf42245..79e4ea8 100644 --- a/qemu/hw/ppc440_bamboo.c +++ b/qemu/hw/ppc440_bamboo.c @@ -38,13 +38,14 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size, qemu_irq *pic; ppc4xx_pci_t *pci; CPUState *env; - uint64_t ep=0; - uint64_t la=0; - int is_linux=1; /* Will assume allways is Linux for now */ - target_long kernel_size=0; - target_ulong initrd_base=0; - target_long initrd_size=0; - target_ulong dt_base=0; +uint64_t elf_entry; +uint64_t elf_lowaddr; + target_ulong entry = 0; + target_ulong loadaddr = 0; + target_long kernel_size = 0; + target_ulong initrd_base = 0; + target_long initrd_size = 0; + target_ulong dt_base = 0; void *fdt; int ret; int ram_stick_sizes[] = {25620, 12820, 6420, @@ -105,20 +106,24 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size, /* load kernel with uboot loader */ printf(%s: load kernel\n, __func__); - ret = load_uimage(kernel_filename, ep, la, kernel_size, is_linux); - if (ret 0) - ret = load_elf(kernel_filename, 0, ep, la, NULL); - - if (ret 0) { + kernel_size = load_uimage(kernel_filename, entry, loadaddr, NULL); + if (kernel_size 0) { + kernel_size = load_elf(kernel_filename, 0, elf_entry, elf_lowaddr, + NULL); +entry = elf_entry; +loadaddr = elf_lowaddr; +} + + if (kernel_size 0) { fprintf(stderr, qemu: could not load kernel '%s'\n, kernel_filename); exit(1); } - printf(kernel is at guest address: 0x%lx\n, (unsigned long)la); + printf(kernel is at guest address: 0x%lx\n, (unsigned long)loadaddr); /* load initrd */ if (initrd_filename) { - initrd_base = kernel_size + la; + initrd_base = kernel_size + loadaddr; printf(%s: load initrd\n, __func__); initrd_size = load_image(initrd_filename, phys_ram_base + initrd_base); @@ -156,7 +161,7 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size, if (initrd_base) dt_base = initrd_base + initrd_size; else - dt_base = kernel_size + la; + dt_base = kernel_size + loadaddr; fdt = load_device_tree(buf, (unsigned long)(phys_ram_base + dt_base)); if (fdt == NULL) { @@ -188,7 +193,7 @@ void bamboo_init(ram_addr_t ram_size, int vga_ram_size, /* location of device tree in register */ env-gpr[3] = dt_base; #endif - env-nip = ep; + env-nip = entry; } if (pci) { -- 1.5.6.5 -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
powerpc kvm-userspace build fixes
These patches fix the kvm-userspace qemu build after a recent merge with upstream qemu. I'm also seeing a build dependency issue with dyngen-opc.h that I don't see upstream. I haven't sorted that out yet, but make qemu/ppcemb-softmmu/dyngen-opc.h first works around the problem. -Hollis -- To unsubscribe from this list: send the line unsubscribe kvm-ppc in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html