Some comments below. :)

On Wednesday 16 July 2008 23:56:50 Ben-Ami Yassour wrote:
> From: Amit Shah <[EMAIL PROTECTED]>
>
> This patch adds support for handling PCI devices that are assigned
> to the guest ("PCI passthrough").
>
> The device to be assigned to the guest is registered in the host
> kernel and interrupt delivery is handled. If a device is already
> assigned, or the device driver for it is still loaded on the host,
> the device assignment is failed by conveying a -EBUSY reply to the
> userspace.
>
> Devices that share their interrupt line are not supported at the
> moment.
>
> By itself, this patch will not make devices work within the guest.
> The VT-d extension is required to enable the device to perform DMA.
> Another alternative is PVDMA.
>
> Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
> Signed-off-by: Ben-Ami Yassour <[EMAIL PROTECTED]>
> Signed-off-by: Han, Weidong <[EMAIL PROTECTED]>
> ---
>  arch/x86/kvm/x86.c         |  267
> ++++++++++++++++++++++++++++++++++++++++++++
> include/asm-x86/kvm_host.h |   37 ++++++
>  include/asm-x86/kvm_para.h |   16 +++-
>  include/linux/kvm.h        |    3 +
>  virt/kvm/ioapic.c          |   12 ++-
>  5 files changed, 332 insertions(+), 3 deletions(-)
>
> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
> index 3167006..65b307d 100644
> --- a/arch/x86/kvm/x86.c
> +++ b/arch/x86/kvm/x86.c
> @@ -4,10 +4,12 @@
>   * derived from drivers/kvm/kvm_main.c
>   *
>   * Copyright (C) 2006 Qumranet, Inc.
> + * Copyright (C) 2008 Qumranet, Inc.
>   *
>   * Authors:
>   *   Avi Kivity   <[EMAIL PROTECTED]>
>   *   Yaniv Kamay  <[EMAIL PROTECTED]>
> + *   Amit Shah    <[EMAIL PROTECTED]>
>   *
>   * This work is licensed under the terms of the GNU GPL, version
> 2.  See * the COPYING file in the top-level directory.
> @@ -23,8 +25,10 @@
>  #include "x86.h"
>
>  #include <linux/clocksource.h>
> +#include <linux/interrupt.h>
>  #include <linux/kvm.h>
>  #include <linux/fs.h>
> +#include <linux/pci.h>
>  #include <linux/vmalloc.h>
>  #include <linux/module.h>
>  #include <linux/mman.h>
> @@ -98,6 +102,256 @@ struct kvm_stats_debugfs_item
> debugfs_entries[] = { { NULL }
>  };
>
[snip]
> +
> +static int kvm_vm_ioctl_pci_pt_dev(struct kvm *kvm,
> +                                struct kvm_pci_passthrough_dev *pci_pt_dev)
> +{
> +     int r = 0;
> +     struct kvm_pci_pt_dev_list *match;
> +     struct pci_dev *dev;
> +
> +     write_lock(&kvm_pci_pt_lock);
> +
> +     /* Check if this is a request to update the irq of the device
> +      * in the guest (BIOS/ kernels can dynamically reprogram irq
> +      * numbers).  This also protects us from adding the same
> +      * device twice.
> +      */
> +     match = kvm_find_pci_pt_dev(&kvm->arch.pci_pt_dev_head,
> +                                 &pci_pt_dev->host, 0, KVM_PT_SOURCE_UPDATE);
> +     if (match) {
> +             match->pt_dev.guest.irq = pci_pt_dev->guest.irq;
> +             write_unlock(&kvm_pci_pt_lock);
> +             goto out;
> +     }
> +     write_unlock(&kvm_pci_pt_lock);
> +
> +     match = kzalloc(sizeof(struct kvm_pci_pt_dev_list), GFP_KERNEL);
> +     if (match == NULL) {
> +             printk(KERN_INFO "%s: Couldn't allocate memory\n",
> +                    __func__);
> +             r = -ENOMEM;
> +             goto out;
> +     }
> +     dev = pci_get_bus_and_slot(pci_pt_dev->host.busnr,
> +                                pci_pt_dev->host.devfn);
> +     if (!dev) {
> +             printk(KERN_INFO "%s: host device not found\n", __func__);
> +             r = -EINVAL;
> +             goto out_free;
> +     }
> +     if (pci_enable_device(dev)) {
> +             printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
> +             r = -EBUSY;
> +             goto out_put;
> +     }
> +     r = pci_request_regions(dev, "kvm_pt_device");
> +     if (r) {
> +             printk(KERN_INFO "%s: Could not get access to device regions\n",
> +                    __func__);
> +             goto out_put;

pci_disable_device()?

> +     }
> +     match->pt_dev.guest.busnr = pci_pt_dev->guest.busnr;
> +     match->pt_dev.guest.devfn = pci_pt_dev->guest.devfn;
> +     match->pt_dev.host.busnr = pci_pt_dev->host.busnr;
> +     match->pt_dev.host.devfn = pci_pt_dev->host.devfn;
> +     match->pt_dev.dev = dev;
> +
> +     write_lock(&kvm_pci_pt_lock);
> +
> +     INIT_WORK(&match->pt_dev.int_work.work, kvm_pci_pt_int_work_fn);
> +     INIT_WORK(&match->pt_dev.ack_work.work, kvm_pci_pt_ack_work_fn);
> +
> +     match->pt_dev.kvm = kvm;
> +     match->pt_dev.int_work.pt_dev = &match->pt_dev;
> +     match->pt_dev.ack_work.pt_dev = &match->pt_dev;
> +
> +     list_add(&match->list, &kvm->arch.pci_pt_dev_head);
> +
> +     write_unlock(&kvm_pci_pt_lock);
> +
> +     if (irqchip_in_kernel(kvm)) {
> +             match->pt_dev.guest.irq = pci_pt_dev->guest.irq;
> +             match->pt_dev.host.irq = dev->irq;
> +             if (kvm->arch.vioapic)
> +                     kvm->arch.vioapic->ack_notifier = kvm_pci_pt_ack_irq;
> +             if (kvm->arch.vpic)
> +                     kvm->arch.vpic->ack_notifier = kvm_pci_pt_ack_irq;
> +
> +             /* Even though this is PCI, we don't want to use shared
> +              * interrupts. Sharing host devices with guest-assigned devices
> +              * on the same interrupt line is not a happy situation: there
> +              * are going to be long delays in accepting, acking, etc.
> +              */
> +             if (request_irq(dev->irq, kvm_pci_pt_dev_intr, 0,
> +                             "kvm_pt_device", (void *)&match->pt_dev)) {
> +                     printk(KERN_INFO "%s: couldn't allocate irq for pv "
> +                            "device\n", __func__);
> +                     r = -EIO;
> +                     goto out_list_del;
> +             }

PCI memory region has not been freed if request_irq() fail.

> +     }
> +
> +out:
> +     return r;
> +out_list_del:
> +     list_del(&match->list);
> +out_put:
> +     pci_dev_put(dev);
> +out_free:
> +     kfree(match);
> +     goto out;
> +}
> +
> +static void kvm_free_pci_passthrough(struct kvm *kvm)
> +{
> +     struct list_head *ptr, *ptr2;
> +     struct kvm_pci_pt_dev_list *pci_pt_dev;
> +
> +     write_lock(&kvm_pci_pt_lock);
> +     list_for_each_safe(ptr, ptr2, &kvm->arch.pci_pt_dev_head) {
> +             pci_pt_dev = list_entry(ptr, struct kvm_pci_pt_dev_list, list);
> +
> +             if (irqchip_in_kernel(kvm) && pci_pt_dev->pt_dev.host.irq)
> +                     free_irq(pci_pt_dev->pt_dev.host.irq,
> +                              (void *)&pci_pt_dev->pt_dev);
> +
> +             if (cancel_work_sync(&pci_pt_dev->pt_dev.int_work.work))
> +                     /* We had pending work. That means we will have to take
> +                      * care of kvm_put_kvm.
> +                      */
> +                     kvm_put_kvm(kvm);

Cancel_work_sync() is might_sleep() within spinlock...

-- 
regards
Yang, Sheng
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to