On Tuesday 23 September 2008 22:54:53 Amit Shah wrote:
> From: Or Sagi <[EMAIL PROTECTED]>
> From: Nir Peleg <[EMAIL PROTECTED]>
> From: Amit Shah <[EMAIL PROTECTED]>
> From: Ben-Ami Yassour <[EMAIL PROTECTED]>
> From: Weidong Han <[EMAIL PROTECTED]>
> From: Glauber de Oliveira Costa <[EMAIL PROTECTED]>
>
> With this patch, we can assign a device on the host machine to a
> guest.
>
> A new command-line option, -pcidevice is added.
> For example, to invoke it for a device sitting at PCI bus:dev.fn
> 04:08.0, use this:
>
>         -pcidevice host=04:08.0
>
> * The host driver for the device, if any, is to be removed before
> assigning the device (else device assignment will fail).
>
> * A device that shares IRQ with another host device cannot currently
> be assigned.
>
> This works only with the in-kernel irqchip method; to use the
> userspace irqchip, a kernel module (irqhook) and some extra changes
> are needed.
>

Hi Amit

I am afraid I got this when try to enable VT-d.

create_userspace_phys_mem: Invalid argument
assigned_dev_iomem_map: Error: create new mapping failed

Can you have a look at it? (and the patch you sent to Weidong don't got this 
problem.)

Thanks.
--
regards
Yang, Sheng

> Signed-off-by: Amit Shah <[EMAIL PROTECTED]>
> ---
>  qemu/Makefile.target        |    1 +
>  qemu/hw/device-assignment.c |  665
> +++++++++++++++++++++++++++++++++++++++++++ qemu/hw/device-assignment.h |  
> 93 ++++++
>  qemu/hw/pc.c                |    9 +
>  qemu/hw/pci.c               |    7 +
>  qemu/vl.c                   |   18 ++
>  6 files changed, 793 insertions(+), 0 deletions(-)
>  create mode 100644 qemu/hw/device-assignment.c
>  create mode 100644 qemu/hw/device-assignment.h
>
> diff --git a/qemu/Makefile.target b/qemu/Makefile.target
> index 72f3db8..40eb273 100644
> --- a/qemu/Makefile.target
> +++ b/qemu/Makefile.target
> @@ -616,6 +616,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o
>  OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o
>  OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o
>  OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o
> +OBJS+= device-assignment.o
>  ifeq ($(USE_KVM_PIT), 1)
>  OBJS+= i8254-kvm.o
>  endif
> diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c
> new file mode 100644
> index 0000000..e70daf2
> --- /dev/null
> +++ b/qemu/hw/device-assignment.c
> @@ -0,0 +1,665 @@
> +/*
> + * Copyright (c) 2007, Neocleus Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> for + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> with + * this program; if not, write to the Free Software Foundation, Inc.,
> 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + *
> + *  Assign a PCI device from the host to a guest VM.
> + *
> + *  Adapted for KVM by Qumranet.
> + *
> + *  Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
> + *  Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
> + *  Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
> + *  Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
> + */
> +#include <stdio.h>
> +#include <sys/io.h>
> +#include "qemu-kvm.h"
> +#include <linux/kvm_para.h>
> +#include "device-assignment.h"
> +
> +/* From linux/ioport.h */
> +#define IORESOURCE_IO          0x00000100      /* Resource type */
> +#define IORESOURCE_MEM         0x00000200
> +#define IORESOURCE_IRQ         0x00000400
> +#define IORESOURCE_DMA         0x00000800
> +#define IORESOURCE_PREFETCH    0x00001000      /* No side effects */
> +
> +/* #define DEVICE_ASSIGNMENT_DEBUG */
> +
> +#ifdef DEVICE_ASSIGNMENT_DEBUG
> +#define DEBUG(fmt, args...) fprintf(stderr, "%s: " fmt, __func__ , ##
> args) +#else
> +#define DEBUG(fmt, args...)
> +#endif
> +
> +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr,
> +                                      uint32_t value)
> +{
> +       AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> +       uint32_t r_pio = (unsigned long)r_access->r_virtbase
> +               + (addr - r_access->e_physbase);
> +
> +       if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) {
> +               fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x"
> +                       " r_virtbase=%08lx value=%08x\n",
> +                       __func__, r_pio, (int)r_access->e_physbase,
> +                       (unsigned long)r_access->r_virtbase, value);
> +       }
> +       iopl(3);
> +       outb(value, r_pio);
> +}
> +
> +static void assigned_dev_ioport_writew(void *opaque, uint32_t addr,
> +                                      uint32_t value)
> +{
> +       AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> +       uint32_t r_pio = (unsigned long)r_access->r_virtbase
> +               + (addr - r_access->e_physbase);
> +
> +       if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) {
> +               fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x"
> +                       " r_virtbase=%08lx value=%08x\n",
> +                       __func__, r_pio, (int)r_access->e_physbase,
> +                       (unsigned long)r_access->r_virtbase, value);
> +       }
> +       iopl(3);
> +       outw(value, r_pio);
> +}
> +
> +static void assigned_dev_ioport_writel(void *opaque, uint32_t addr,
> +                                      uint32_t value)
> +{
> +       AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> +       uint32_t r_pio = (unsigned long)r_access->r_virtbase
> +               + (addr - r_access->e_physbase);
> +
> +       if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) {
> +               fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x"
> +                       " r_virtbase=%08lx value=%08x\n",
> +                       __func__, r_pio, (int)r_access->e_physbase,
> +                       (unsigned long)r_access->r_virtbase, value);
> +       }
> +       iopl(3);
> +       outl(value, r_pio);
> +}
> +
> +static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t addr)
> +{
> +       AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> +       uint32_t r_pio = (addr - r_access->e_physbase)
> +               + (unsigned long)r_access->r_virtbase;
> +       uint32_t value;
> +
> +       iopl(3);
> +       value = inb(r_pio);
> +       if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) {
> +               fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x "
> +                       "r_virtbase=%08lx value=%08x\n",
> +                       __func__, r_pio, (int)r_access->e_physbase,
> +                       (unsigned long)r_access->r_virtbase, value);
> +       }
> +       return value;
> +}
> +
> +static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t addr)
> +{
> +       AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> +       uint32_t r_pio = (addr - r_access->e_physbase)
> +               + (unsigned long)r_access->r_virtbase;
> +       uint32_t value;
> +
> +       iopl(3);
> +       value = inw(r_pio);
> +       if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) {
> +               fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x "
> +                       "r_virtbase=%08lx value=%08x\n",
> +                       __func__, r_pio, (int)r_access->e_physbase,
> +                       (unsigned long)r_access->r_virtbase, value);
> +       }
> +       return value;
> +}
> +
> +static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t addr)
> +{
> +       AssignedDevRegion *r_access = (AssignedDevRegion *)opaque;
> +       uint32_t r_pio = (addr - r_access->e_physbase)
> +               + (unsigned long)r_access->r_virtbase;
> +       uint32_t value;
> +
> +       iopl(3);
> +       value = inl(r_pio);
> +       if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) {
> +               fprintf(stderr, "%s: r_pio=%08x e_physbase=%08x "
> +                       "r_virtbase=%08lx value=%08x\n",
> +                       __func__, r_pio, (int)r_access->e_physbase,
> +                       (unsigned long)r_access->r_virtbase, value);
> +       }
> +       return value;
> +}
> +
> +static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num,
> +                        uint32_t e_phys, uint32_t e_size, int type)
> +{
> +       AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
> +       AssignedDevRegion *region = &r_dev->v_addrs[region_num];
> +       int first_map = (region->e_size == 0);
> +       int ret = 0;
> +
> +       DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n",
> +             e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size,
> +             region_num);
> +
> +       region->e_physbase = e_phys;
> +       region->e_size = e_size;
> +
> +       /* FIXME: Add support for emulated MMIO for non-kvm guests */
> +       if (kvm_enabled()) {
> +               if (!first_map)
> +                       kvm_destroy_phys_mem(kvm_context, e_phys, e_size);
> +               if (e_size > 0)
> +                       ret = kvm_register_phys_mem(kvm_context, e_phys,
> +                                                   region->r_virtbase,
> +                                                   e_size, 0);
> +               if (ret != 0)
> +                       fprintf(stderr,
> +                               "%s: Error: create new mapping failed\n",
> +                               __func__);
> +       }
> +}
> +
> +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num,
> +                                   uint32_t addr, uint32_t size, int type)
> +{
> +       AssignedDevice *r_dev = (AssignedDevice *) pci_dev;
> +
> +       r_dev->v_addrs[region_num].e_physbase = addr;
> +       DEBUG("%s: address=0x%x type=0x%x len=%d region_num=%d \n",
> +             __func__, addr, type, size, region_num);
> +
> +       register_ioport_read(addr, size, 1, assigned_dev_ioport_readb,
> +                            (void *) (r_dev->v_addrs + region_num));
> +       register_ioport_read(addr, size, 2, assigned_dev_ioport_readw,
> +                            (void *) (r_dev->v_addrs + region_num));
> +       register_ioport_read(addr, size, 4, assigned_dev_ioport_readl,
> +                            (void *) (r_dev->v_addrs + region_num));
> +       register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb,
> +                             (void *) (r_dev->v_addrs + region_num));
> +       register_ioport_write(addr, size, 2, assigned_dev_ioport_writew,
> +                             (void *) (r_dev->v_addrs + region_num));
> +       register_ioport_write(addr, size, 4, assigned_dev_ioport_writel,
> +                             (void *) (r_dev->v_addrs + region_num));
> +}
> +
> +static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address,
> +                                         uint32_t val, int len)
> +{
> +       int fd, r;
> +
> +       DEBUG("%s: (%x.%x): address=%04x val=0x%08x len=%d\n",
> +             __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
> +             (uint16_t) address, val, len);
> +
> +       if (address == 0x4) {
> +               pci_default_write_config(d, address, val, len);
> +               /* Continue to program the card */
> +       }
> +
> +       if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> +           address == 0x3c || address == 0x3d) {
> +               /* used for update-mappings (BAR emulation) */
> +               pci_default_write_config(d, address, val, len);
> +               return;
> +       }
> +       DEBUG("%s: NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n",
> +             __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7),
> +             (uint16_t) address, val, len);
> +       fd = ((AssignedDevice *)d)->real_device.config_fd;
> +       r = lseek(fd, address, SEEK_SET);
> +       if (r < 0) {
> +               fprintf(stderr, "%s: bad seek, errno = %d\n",
> +                       __func__, errno);
> +               return;
> +       }
> +again:
> +       r = write(fd, &val, len);
> +       if (r < 0) {
> +               if (errno == EINTR || errno == EAGAIN)
> +                       goto again;
> +               fprintf(stderr, "%s: write failed, errno = %d\n",
> +                       __func__, errno);
> +       }
> +}
> +
> +static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t
> address, +                                            int len)
> +{
> +       uint32_t val = 0;
> +       int fd, r;
> +
> +       if ((address >= 0x10 && address <= 0x24) || address == 0x34 ||
> +           address == 0x3c || address == 0x3d) {
> +               val = pci_default_read_config(d, address, len);
> +               DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> +                     (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address,
> val, +                     len);
> +               return val;
> +       }
> +
> +       /* vga specific, remove later */
> +       if (address == 0xFC)
> +               goto do_log;
> +
> +       fd = ((AssignedDevice *)d)->real_device.config_fd;
> +       r = lseek(fd, address, SEEK_SET);
> +       if (r < 0) {
> +               fprintf(stderr, "%s: bad seek, errno = %d\n",
> +                       __func__, errno);
> +               return val;
> +       }
> +again:
> +       r = read(fd, &val, len);
> +       if (r < 0) {
> +               if (errno == EINTR || errno == EAGAIN)
> +                       goto again;
> +               fprintf(stderr, "%s: read failed, errno = %d\n",
> +                       __func__, errno);
> +       }
> +do_log:
> +       DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n",
> +             (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len);
> +
> +       /* kill the special capabilities */
> +       if (address == 4 && len == 4)
> +               val &= ~0x100000;
> +       else if (address == 6)
> +               val &= ~0x10;
> +
> +       return val;
> +}
> +
> +static int assigned_dev_register_regions(PCIRegion *io_regions,
> +                                        unsigned long regions_num,
> +                                        AssignedDevice *pci_dev)
> +{
> +       uint32_t i;
> +       PCIRegion *cur_region = io_regions;
> +
> +       for (i = 0; i < regions_num; i++, cur_region++) {
> +               if (!cur_region->valid)
> +                       continue;
> +#ifdef DEVICE_ASSIGNMENT_DEBUG
> +               pci_dev->v_addrs[i].debug |= DEVICE_ASSIGNMENT_DEBUG_MMIO
> +                                            | DEVICE_ASSIGNMENT_DEBUG_PIO;
> +#endif
> +               pci_dev->v_addrs[i].num = i;
> +
> +               /* handle memory io regions */
> +               if (cur_region->type & IORESOURCE_MEM) {
> +                       int t = cur_region->type & IORESOURCE_PREFETCH
> +                               ? PCI_ADDRESS_SPACE_MEM_PREFETCH
> +                               : PCI_ADDRESS_SPACE_MEM;
> +
> +                       /* map physical memory */
> +                       pci_dev->v_addrs[i].e_physbase =
> cur_region->base_addr; +                      
> pci_dev->v_addrs[i].r_virtbase =
> +                               mmap(NULL,
> +                                    (cur_region->size + 0xFFF) &
> 0xFFFFF000, +                                    PROT_WRITE | PROT_READ,
> MAP_SHARED, +                                    cur_region->resource_fd,
> (off_t) 0); +
> +                       if ((void *) -1 == pci_dev->v_addrs[i].r_virtbase)
> { +                               fprintf(stderr, "%s: Error: Couldn't mmap
> 0x%x!" +                                       "\n", __func__,
> +                                       (uint32_t)
> (cur_region->base_addr)); +                               return -1;
> +                       }
> +                       pci_dev->v_addrs[i].r_size = cur_region->size;
> +                       pci_dev->v_addrs[i].e_size = 0;
> +
> +                       /* add offset */
> +                       pci_dev->v_addrs[i].r_virtbase +=
> +                               (cur_region->base_addr & 0xFFF);
> +
> +                       pci_register_io_region((PCIDevice *) pci_dev, i,
> +                                              cur_region->size, t,
> +                                              assigned_dev_iomem_map);
> +                       continue;
> +               }
> +               /* handle port io regions */
> +               pci_register_io_region((PCIDevice *) pci_dev, i,
> +                                      cur_region->size,
> PCI_ADDRESS_SPACE_IO, +                                     
> assigned_dev_ioport_map);
> +
> +               pci_dev->v_addrs[i].e_physbase = cur_region->base_addr;
> +               pci_dev->v_addrs[i].r_virtbase =
> +                       (void *)(long)cur_region->base_addr;
> +               /* not relevant for port io */
> +               pci_dev->v_addrs[i].memory_index = 0;
> +       }
> +
> +       /* success */
> +       return 0;
> +}
> +
> +static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus,
> +                          uint8_t r_dev, uint8_t r_func)
> +{
> +       char dir[128], name[128], comp[16];
> +       int fd, r = 0;
> +       FILE *f;
> +       unsigned long long start, end, size, flags;
> +       PCIRegion *rp;
> +       PCIDevRegions *dev = &pci_dev->real_device;
> +
> +       dev->region_number = 0;
> +
> +       sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%x/",
> +               r_bus, r_dev, r_func);
> +       strcpy(name, dir);
> +       strcat(name, "config");
> +       fd = open(name, O_RDWR);
> +       if (fd == -1) {
> +               fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +               return 1;
> +       }
> +       dev->config_fd = fd;
> +again:
> +       r = read(fd, pci_dev->dev.config, sizeof pci_dev->dev.config);
> +       if (r < 0) {
> +               if (errno == EINTR || errno == EAGAIN)
> +                       goto again;
> +               fprintf(stderr, "%s: read failed, errno = %d\n",
> +                       __func__, errno);
> +       }
> +       strcpy(name, dir);
> +       strcat(name, "resource");
> +
> +       f = fopen(name, "r");
> +       if (f == NULL) {
> +               fprintf(stderr, "%s: %s: %m\n", __func__, name);
> +               return 1;
> +       }
> +       for (r = 0; fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) ==
> 3; +            r++) {
> +               rp = dev->regions + r;
> +               rp->valid = 0;
> +               size = end - start + 1;
> +               flags &= IORESOURCE_IO | IORESOURCE_MEM |
> IORESOURCE_PREFETCH; +               if (size == 0 || (flags &
> ~IORESOURCE_PREFETCH) == 0) +                       continue;
> +               if (flags & IORESOURCE_MEM) {
> +                       flags &= ~IORESOURCE_IO;
> +                       sprintf(comp, "resource%d", r);
> +                       strcpy(name, dir);
> +                       strcat(name, comp);
> +                       fd = open(name, O_RDWR);
> +                       if (fd == -1)
> +                               continue;               /* probably ROM */
> +                       rp->resource_fd = fd;
> +               } else
> +                       flags &= ~IORESOURCE_PREFETCH;
> +
> +               rp->type = flags;
> +               rp->valid = 1;
> +               rp->base_addr = start;
> +               rp->size = size;
> +               DEBUG("%s: region %d size %d start 0x%x type %d "
> +                     "resource_fd %d\n", __func__, r, rp->size, start,
> +                     rp->type, rp->resource_fd);
> +       }
> +       fclose(f);
> +
> +       dev->region_number = r;
> +       return 0;
> +}
> +
> +#define        MAX_ASSIGNED_DEVS 4
> +struct {
> +       char name[15];
> +       int bus;
> +       int dev;
> +       int func;
> +       AssignedDevice *assigned_dev;
> +} assigned_devices[MAX_ASSIGNED_DEVS];
> +
> +int nr_assigned_devices;
> +static int disable_iommu;
> +
> +static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn)
> +{
> +       return (uint32_t)bus << 8 | (uint32_t)devfn;
> +}
> +
> +static AssignedDevice *register_real_device(PCIBus *e_bus,
> +                                           const char *e_dev_name,
> +                                           int e_devfn, uint8_t r_bus,
> +                                           uint8_t r_dev, uint8_t r_func)
> +{
> +       int r;
> +       AssignedDevice *pci_dev;
> +       uint8_t e_device, e_intx;
> +
> +       DEBUG("%s: Registering real physical device %s (devfn=0x%x)\n",
> +             __func__, e_dev_name, e_devfn);
> +
> +       pci_dev = (AssignedDevice *)
> +               pci_register_device(e_bus, e_dev_name,
> sizeof(AssignedDevice), +                                   e_devfn,
> assigned_dev_pci_read_config, +                                  
> assigned_dev_pci_write_config); +       if (NULL == pci_dev) {
> +               fprintf(stderr, "%s: Error: Couldn't register real device
> %s\n", +                       __func__, e_dev_name);
> +               return NULL;
> +       }
> +       if (get_real_device(pci_dev, r_bus, r_dev, r_func)) {
> +               fprintf(stderr, "%s: Error: Couldn't get real device
> (%s)!\n", +                       __func__, e_dev_name);
> +               goto out;
> +       }
> +
> +       /* handle real device's MMIO/PIO BARs */
> +       if (assigned_dev_register_regions(pci_dev->real_device.regions,
> +                                        
> pci_dev->real_device.region_number, +                                      
>   pci_dev))
> +               goto out;
> +
> +       /* handle interrupt routing */
> +       e_device = (pci_dev->dev.devfn >> 3) & 0x1f;
> +       e_intx = pci_dev->dev.config[0x3d] - 1;
> +       pci_dev->intpin = e_intx;
> +       pci_dev->run = 0;
> +       pci_dev->girq = 0;
> +       pci_dev->h_busnr = r_bus;
> +       pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func);
> +
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> +       if (kvm_enabled()) {
> +               struct kvm_assigned_pci_dev assigned_dev_data;
> +
> +               memset(&assigned_dev_data, 0, sizeof(assigned_dev_data));
> +               assigned_dev_data.assigned_dev_id  =
> +                       calc_assigned_dev_id(pci_dev->h_busnr,
> +                                            (uint32_t)pci_dev->h_devfn);
> +               assigned_dev_data.busnr = pci_dev->h_busnr;
> +               assigned_dev_data.devfn = pci_dev->h_devfn;
> +
> +#ifdef KVM_CAP_IOMMU
> +               /* We always enable the IOMMU if present
> +                * (or when not disabled on the command line)
> +                */
> +               r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU);
> +               if (r && !disable_iommu)
> +                       assigned_dev_data.flags |=
> KVM_DEV_ASSIGN_ENABLE_IOMMU; +#endif
> +               r = kvm_assign_pci_device(kvm_context, &assigned_dev_data);
> +               if (r < 0) {
> +                       fprintf(stderr, "Could not notify kernel about "
> +                               "assigned device \"%s\"\n", e_dev_name);
> +                       perror("pt-ioctl");
> +                       goto out;
> +               }
> +       }
> +#endif
> +       fprintf(stderr, "Registered host PCI device %02x:%02x.%1x "
> +               "(\"%s\") as guest device %02x:%02x.%1x\n",
> +               r_bus, r_dev, r_func, e_dev_name,
> +               pci_bus_num(e_bus), e_device, r_func);
> +
> +       return pci_dev;
> +out:
> +       pci_unregister_device(&pci_dev->dev);
> +       return NULL;
> +}
> +
> +extern int get_param_value(char *buf, int buf_size,
> +                          const char *tag, const char *str);
> +extern int piix_get_irq(int);
> +
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> +/* The pci config space got updated. Check if irq numbers have changed
> + * for our devices
> + */
> +void assigned_dev_update_irq(PCIDevice *d)
> +{
> +       int i, irq, r;
> +       AssignedDevice *assigned_dev;
> +
> +       for (i = 0; i < nr_assigned_devices; i++) {
> +               assigned_dev = assigned_devices[i].assigned_dev;
> +               if (assigned_dev == NULL)
> +                       continue;
> +
> +               irq = pci_map_irq(&assigned_dev->dev,
> assigned_dev->intpin); +               irq = piix_get_irq(irq);
> +
> +               if (irq != assigned_dev->girq) {
> +                       struct kvm_assigned_irq assigned_irq_data;
> +
> +                       memset(&assigned_irq_data, 0, sizeof
> assigned_irq_data); +                      
> assigned_irq_data.assigned_dev_id  =
> +                               calc_assigned_dev_id(assigned_dev->h_busnr,
> +                                                    (uint8_t)
> +                                                   
> assigned_dev->h_devfn); +                       assigned_irq_data.guest_irq
> = irq;
> +                       assigned_irq_data.host_irq =
> +                               assigned_dev->real_device.irq;
> +                       r = kvm_assign_irq(kvm_context,
> &assigned_irq_data); +                       if (r < 0) {
> +                               perror("assigned_dev_update_irq");
> +                               fprintf(stderr, "Are you assigning a device
> " +                                       "that shares IRQ with some other
> " +                                       "device?\n");
> +                               pci_unregister_device(&assigned_dev->dev);
> +                               continue;
> +                       }
> +                       assigned_dev->girq = irq;
> +               }
> +       }
> +}
> +#endif
> +
> +static int init_device_assignment(void)
> +{
> +       /* Do we have any devices to be assigned? */
> +       if (nr_assigned_devices == 0)
> +               return -1;
> +       iopl(3);
> +       return 0;
> +}
> +
> +struct PCIDevice *init_assigned_device(PCIBus *bus, int *index)
> +{
> +       AssignedDevice *dev = NULL;
> +       int i;
> +
> +       if (*index == -1) {
> +               if (init_device_assignment() < 0)
> +                       return NULL;
> +
> +               *index = nr_assigned_devices - 1;
> +       }
> +       i = *index;
> +       dev = register_real_device(bus, assigned_devices[i].name, -1,
> +                                  assigned_devices[i].bus,
> +                                  assigned_devices[i].dev,
> +                                  assigned_devices[i].func);
> +       if (dev == NULL) {
> +               fprintf(stderr, "Error: Couldn't register device \"%s\"\n",
> +                       assigned_devices[i].name);
> +       }
> +       assigned_devices[i].assigned_dev = dev;
> +
> +       --*index;
> +       return &dev->dev;
> +}
> +
> +/*
> + * Syntax to assign device:
> + *
> + * -pcidevice dev=bus:dev.func,dma=dma
> + *
> + * Example:
> + * -pcidevice host=00:13.0,dma=pvdma
> + *
> + * dma can currently only be 'none' to disable iommu support.
> + */
> +void add_assigned_device(const char *arg)
> +{
> +       char *cp, *cp1;
> +       char device[8];
> +       char dma[6];
> +       int r;
> +
> +       if (nr_assigned_devices >= MAX_ASSIGNED_DEVS) {
> +               fprintf(stderr, "Too many assigned devices (max %d)\n",
> +                       MAX_ASSIGNED_DEVS);
> +               return;
> +       }
> +       memset(&assigned_devices[nr_assigned_devices], 0,
> +              sizeof assigned_devices[nr_assigned_devices]);
> +
> +       r = get_param_value(device, sizeof device, "host", arg);
> +
> +       r = get_param_value(assigned_devices[nr_assigned_devices].name,
> +                           sizeof
> assigned_devices[nr_assigned_devices].name, +                          
> "name", arg);
> +       if (!r)
> +               strncpy(assigned_devices[nr_assigned_devices].name, device,
> 8); +
> +#ifdef KVM_CAP_IOMMU
> +       r = get_param_value(dma, sizeof dma, "dma", arg);
> +       if (r && !strncmp(dma, "none", 4))
> +               disable_iommu = 1;
> +#endif
> +       cp = device;
> +       assigned_devices[nr_assigned_devices].bus = strtoul(cp, &cp1, 16);
> +       if (*cp1 != ':')
> +               goto bad;
> +       cp = cp1 + 1;
> +
> +       assigned_devices[nr_assigned_devices].dev = strtoul(cp, &cp1, 16);
> +       if (*cp1 != '.')
> +               goto bad;
> +       cp = cp1 + 1;
> +
> +       assigned_devices[nr_assigned_devices].func = strtoul(cp, &cp1, 16);
> +
> +       nr_assigned_devices++;
> +       return;
> +bad:
> +       fprintf(stderr, "pcidevice argument parse error; "
> +               "please check the help text for usage\n");
> +}
> diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h
> new file mode 100644
> index 0000000..b77e484
> --- /dev/null
> +++ b/qemu/hw/device-assignment.h
> @@ -0,0 +1,93 @@
> +/*
> + * Copyright (c) 2007, Neocleus Corporation.
> + * Copyright (c) 2007, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
> for + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along
> with + * this program; if not, write to the Free Software Foundation, Inc.,
> 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA.
> + *
> + *  Data structures for storing PCI state
> + *
> + *  Adapted to kvm by Qumranet
> + *
> + *  Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED])
> + *  Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED])
> + *  Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED])
> + *  Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED])
> + */
> +
> +#ifndef __DEVICE_ASSIGNMENT_H__
> +#define __DEVICE_ASSIGNMENT_H__
> +
> +#include <sys/mman.h>
> +#include "qemu-common.h"
> +#include "pci.h"
> +#include <linux/types.h>
> +
> +#define DEVICE_ASSIGNMENT_DEBUG_PIO    (0x01)
> +#define DEVICE_ASSIGNMENT_DEBUG_MMIO   (0x02)
> +
> +/* From include/linux/pci.h in the kernel sources */
> +#define PCI_DEVFN(slot, func)  ((((slot) & 0x1f) << 3) | ((func) & 0x07))
> +
> +typedef uint32_t pciaddr_t;
> +
> +#define MAX_IO_REGIONS                 (6)
> +
> +typedef struct pci_region_s {
> +       int type;       /* Memory or port I/O */
> +       int valid;
> +       pciaddr_t base_addr;
> +       pciaddr_t size;         /* size of the region */
> +       int resource_fd;
> +} PCIRegion;
> +
> +typedef struct pci_dev_s {
> +       uint8_t bus, dev, func; /* Bus inside domain, device and function
> */ +       int irq;                /* IRQ number */
> +       uint16_t region_number; /* number of active regions */
> +
> +       /* Port I/O or MMIO Regions */
> +       PCIRegion regions[MAX_IO_REGIONS];
> +       int config_fd;
> +} PCIDevRegions;
> +
> +typedef struct assigned_dev_region_s {
> +       target_phys_addr_t e_physbase;
> +       uint32_t memory_index;
> +       void *r_virtbase;       /* mmapped access address */
> +       int num;                /* our index within v_addrs[] */
> +       uint32_t e_size;        /* emulated size of region in bytes */
> +       uint32_t r_size;        /* real size of region in bytes */
> +       uint32_t debug;
> +} AssignedDevRegion;
> +
> +typedef struct assigned_dev_s {
> +       PCIDevice dev;
> +       int intpin;
> +       uint8_t debug_flags;
> +       AssignedDevRegion v_addrs[PCI_NUM_REGIONS];
> +       PCIDevRegions real_device;
> +       int run;
> +       int girq;
> +       unsigned char h_busnr;
> +       unsigned int h_devfn;
> +       int bound;
> +} AssignedDevice;
> +
> +/* Initialization functions */
> +PCIDevice *init_assigned_device(PCIBus *bus, int *index);
> +void add_assigned_device(const char *arg);
> +void assigned_dev_set_vector(int irq, int vector);
> +void assigned_dev_ack_mirq(int vector);
> +
> +#endif                         /* __DEVICE_ASSIGNMENT_H__ */
> diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c
> index 6053103..4a611cc 100644
> --- a/qemu/hw/pc.c
> +++ b/qemu/hw/pc.c
> @@ -32,6 +32,7 @@
>  #include "smbus.h"
>  #include "boards.h"
>  #include "console.h"
> +#include "device-assignment.h"
>
>  #include "qemu-kvm.h"
>
> @@ -1006,6 +1007,14 @@ static void pc_init1(ram_addr_t ram_size, int
> vga_ram_size, }
>      }
>
> +    /* Initialize assigned devices */
> +    if (pci_enabled) {
> +        int r = -1;
> +        do {
> +            init_assigned_device(pci_bus, &r);
> +       } while (r >= 0);
> +    }
> +
>      rtc_state = rtc_init(0x70, i8259[8]);
>
>      qemu_register_boot_set(pc_boot_set, rtc_state);
> diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c
> index 61ff0f6..e4e8386 100644
> --- a/qemu/hw/pci.c
> +++ b/qemu/hw/pci.c
> @@ -50,6 +50,7 @@ struct PCIBus {
>
>  static void pci_update_mappings(PCIDevice *d);
>  static void pci_set_irq(void *opaque, int irq_num, int level);
> +void assigned_dev_update_irq(PCIDevice *d);
>
>  target_phys_addr_t pci_mem_base;
>  static int pci_irq_index;
> @@ -453,6 +454,12 @@ void pci_default_write_config(PCIDevice *d,
>          val >>= 8;
>      }
>
> +#ifdef KVM_CAP_DEVICE_ASSIGNMENT
> +    if (kvm_enabled() && qemu_kvm_irqchip_in_kernel() &&
> +       address >= 0x60 && address <= 0x63)
> +       assigned_dev_update_irq(d);
> +#endif
> +
>      end = address + len;
>      if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) {
>          /* if the command register is modified, we must modify the
> mappings */ diff --git a/qemu/vl.c b/qemu/vl.c
> index 2fb8552..83f28c5 100644
> --- a/qemu/vl.c
> +++ b/qemu/vl.c
> @@ -37,6 +37,7 @@
>  #include "qemu-char.h"
>  #include "block.h"
>  #include "audio/audio.h"
> +#include "hw/device-assignment.h"
>  #include "migration.h"
>  #include "balloon.h"
>  #include "qemu-kvm.h"
> @@ -8469,6 +8470,12 @@ static void help(int exitcode)
>  #endif
>            "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n"
>            "-no-kvm-pit     disable KVM kernel mode PIT\n"
> +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
> +          "-pcidevice host=bus:dev.func[,dma=none][,name=\"string\"]\n"
> +          "                expose a PCI device to the guest OS.\n"
> +          "                dma=none: don't perform any dma translations
> (default is to use an iommu)\n" +          "                'string' is
> used in log output.\n"
> +#endif
>  #endif
>  #ifdef TARGET_I386
>             "-std-vga        simulate a standard VGA card with VESA Bochs
> Extensions\n" @@ -8592,6 +8599,9 @@ enum {
>      QEMU_OPTION_no_kvm,
>      QEMU_OPTION_no_kvm_irqchip,
>      QEMU_OPTION_no_kvm_pit,
> +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
> +    QEMU_OPTION_pcidevice,
> +#endif
>      QEMU_OPTION_no_reboot,
>      QEMU_OPTION_no_shutdown,
>      QEMU_OPTION_show_cursor,
> @@ -8680,6 +8690,9 @@ const QEMUOption qemu_options[] = {
>  #endif
>      { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip },
>      { "no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit },
> +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
> +    { "pcidevice", HAS_ARG, QEMU_OPTION_pcidevice },
> +#endif
>  #endif
>  #if defined(TARGET_PPC) || defined(TARGET_SPARC)
>      { "g", 1, QEMU_OPTION_g },
> @@ -9586,6 +9599,11 @@ int main(int argc, char **argv)
>                 kvm_pit = 0;
>                 break;
>             }
> +#if defined(TARGET_I386) || defined(TARGET_X86_64) || defined(__linux__)
> +           case QEMU_OPTION_pcidevice:
> +               add_assigned_device(optarg);
> +               break;
> +#endif
>  #endif
>              case QEMU_OPTION_usb:
>                  usb_enabled = 1;
> --
> 1.5.4.3
>
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to [EMAIL PROTECTED]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to