Some comments below. :) On Wednesday 16 July 2008 23:56:50 Ben-Ami Yassour wrote: > From: Amit Shah <[EMAIL PROTECTED]> > > This patch adds support for handling PCI devices that are assigned > to the guest ("PCI passthrough"). > > The device to be assigned to the guest is registered in the host > kernel and interrupt delivery is handled. If a device is already > assigned, or the device driver for it is still loaded on the host, > the device assignment is failed by conveying a -EBUSY reply to the > userspace. > > Devices that share their interrupt line are not supported at the > moment. > > By itself, this patch will not make devices work within the guest. > The VT-d extension is required to enable the device to perform DMA. > Another alternative is PVDMA. > > Signed-off-by: Amit Shah <[EMAIL PROTECTED]> > Signed-off-by: Ben-Ami Yassour <[EMAIL PROTECTED]> > Signed-off-by: Han, Weidong <[EMAIL PROTECTED]> > --- > arch/x86/kvm/x86.c | 267 > ++++++++++++++++++++++++++++++++++++++++++++ > include/asm-x86/kvm_host.h | 37 ++++++ > include/asm-x86/kvm_para.h | 16 +++- > include/linux/kvm.h | 3 + > virt/kvm/ioapic.c | 12 ++- > 5 files changed, 332 insertions(+), 3 deletions(-) > > diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c > index 3167006..65b307d 100644 > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -4,10 +4,12 @@ > * derived from drivers/kvm/kvm_main.c > * > * Copyright (C) 2006 Qumranet, Inc. > + * Copyright (C) 2008 Qumranet, Inc. > * > * Authors: > * Avi Kivity <[EMAIL PROTECTED]> > * Yaniv Kamay <[EMAIL PROTECTED]> > + * Amit Shah <[EMAIL PROTECTED]> > * > * This work is licensed under the terms of the GNU GPL, version > 2. See * the COPYING file in the top-level directory. > @@ -23,8 +25,10 @@ > #include "x86.h" > > #include <linux/clocksource.h> > +#include <linux/interrupt.h> > #include <linux/kvm.h> > #include <linux/fs.h> > +#include <linux/pci.h> > #include <linux/vmalloc.h> > #include <linux/module.h> > #include <linux/mman.h> > @@ -98,6 +102,256 @@ struct kvm_stats_debugfs_item > debugfs_entries[] = { { NULL } > }; > [snip] > + > +static int kvm_vm_ioctl_pci_pt_dev(struct kvm *kvm, > + struct kvm_pci_passthrough_dev *pci_pt_dev) > +{ > + int r = 0; > + struct kvm_pci_pt_dev_list *match; > + struct pci_dev *dev; > + > + write_lock(&kvm_pci_pt_lock); > + > + /* Check if this is a request to update the irq of the device > + * in the guest (BIOS/ kernels can dynamically reprogram irq > + * numbers). This also protects us from adding the same > + * device twice. > + */ > + match = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head, > + &pci_pt_dev->host, 0, KVM_PT_SOURCE_UPDATE); > + if (match) { > + match->pt_dev.guest.irq = pci_pt_dev->guest.irq; > + write_unlock(&kvm_pci_pt_lock); > + goto out; > + } > + write_unlock(&kvm_pci_pt_lock); > + > + match = kzalloc(sizeof(struct kvm_pci_pt_dev_list), GFP_KERNEL); > + if (match == NULL) { > + printk(KERN_INFO "%s: Couldn't allocate memory\n", > + __func__); > + r = -ENOMEM; > + goto out; > + } > + dev = pci_get_bus_and_slot(pci_pt_dev->host.busnr, > + pci_pt_dev->host.devfn); > + if (!dev) { > + printk(KERN_INFO "%s: host device not found\n", __func__); > + r = -EINVAL; > + goto out_free; > + } > + if (pci_enable_device(dev)) { > + printk(KERN_INFO "%s: Could not enable PCI device\n", __func__); > + r = -EBUSY; > + goto out_put; > + } > + r = pci_request_regions(dev, "kvm_pt_device"); > + if (r) { > + printk(KERN_INFO "%s: Could not get access to device regions\n", > + __func__); > + goto out_put;
pci_disable_device()? > + } > + match->pt_dev.guest.busnr = pci_pt_dev->guest.busnr; > + match->pt_dev.guest.devfn = pci_pt_dev->guest.devfn; > + match->pt_dev.host.busnr = pci_pt_dev->host.busnr; > + match->pt_dev.host.devfn = pci_pt_dev->host.devfn; > + match->pt_dev.dev = dev; > + > + write_lock(&kvm_pci_pt_lock); > + > + INIT_WORK(&match->pt_dev.int_work.work, kvm_pci_pt_int_work_fn); > + INIT_WORK(&match->pt_dev.ack_work.work, kvm_pci_pt_ack_work_fn); > + > + match->pt_dev.kvm = kvm; > + match->pt_dev.int_work.pt_dev = &match->pt_dev; > + match->pt_dev.ack_work.pt_dev = &match->pt_dev; > + > + list_add(&match->list, &kvm->arch.pci_pt_dev_head); > + > + write_unlock(&kvm_pci_pt_lock); > + > + if (irqchip_in_kernel(kvm)) { > + match->pt_dev.guest.irq = pci_pt_dev->guest.irq; > + match->pt_dev.host.irq = dev->irq; > + if (kvm->arch.vioapic) > + kvm->arch.vioapic->ack_notifier = kvm_pci_pt_ack_irq; > + if (kvm->arch.vpic) > + kvm->arch.vpic->ack_notifier = kvm_pci_pt_ack_irq; > + > + /* Even though this is PCI, we don't want to use shared > + * interrupts. Sharing host devices with guest-assigned devices > + * on the same interrupt line is not a happy situation: there > + * are going to be long delays in accepting, acking, etc. > + */ > + if (request_irq(dev->irq, kvm_pci_pt_dev_intr, 0, > + "kvm_pt_device", (void *)&match->pt_dev)) { > + printk(KERN_INFO "%s: couldn't allocate irq for pv " > + "device\n", __func__); > + r = -EIO; > + goto out_list_del; > + } PCI memory region has not been freed if request_irq() fail. > + } > + > +out: > + return r; > +out_list_del: > + list_del(&match->list); > +out_put: > + pci_dev_put(dev); > +out_free: > + kfree(match); > + goto out; > +} > + > +static void kvm_free_pci_passthrough(struct kvm *kvm) > +{ > + struct list_head *ptr, *ptr2; > + struct kvm_pci_pt_dev_list *pci_pt_dev; > + > + write_lock(&kvm_pci_pt_lock); > + list_for_each_safe(ptr, ptr2, &kvm->arch.pci_pt_dev_head) { > + pci_pt_dev = list_entry(ptr, struct kvm_pci_pt_dev_list, list); > + > + if (irqchip_in_kernel(kvm) && pci_pt_dev->pt_dev.host.irq) > + free_irq(pci_pt_dev->pt_dev.host.irq, > + (void *)&pci_pt_dev->pt_dev); > + > + if (cancel_work_sync(&pci_pt_dev->pt_dev.int_work.work)) > + /* We had pending work. That means we will have to take > + * care of kvm_put_kvm. > + */ > + kvm_put_kvm(kvm); Cancel_work_sync() is might_sleep() within spinlock... -- regards Yang, Sheng -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html