Amit Shah wrote: > This patch has been contributed to by the following people: > > From: Or Sagi <[EMAIL PROTECTED]> > From: Nir Peleg <[EMAIL PROTECTED]> > From: Amit Shah <[EMAIL PROTECTED]> > From: Ben-Ami Yassour <[EMAIL PROTECTED]> > From: Weidong Han <[EMAIL PROTECTED]> > From: Glauber de Oliveira Costa <[EMAIL PROTECTED]> > > With this patch, we can assign a device on the host machine to a > guest. > > A new command-line option, -pcidevice is added. > To invoke it for a device sitting at PCI bus:dev.fn 04:08.0, use this: > > -pcidevice host=04:08.0 > > * The host driver for the device, if any, is to be removed before > assigning the device (else device assignment will fail). > > * A device that shares IRQ with another host device cannot currently > be assigned. > > * The RAW_IO capability is needed for this to work > > This works only with the in-kernel irqchip method; to use the > userspace irqchip, a kernel module (irqhook) and some extra changes > are needed. > > Signed-off-by: Amit Shah <[EMAIL PROTECTED]> > --- > qemu/Makefile.target | 1 + > qemu/hw/device-assignment.c | 619 > +++++++++++++++++++++++++++++++++++++++++++ > qemu/hw/device-assignment.h | 98 +++++++ qemu/hw/pc.c > | 6 + qemu/hw/pci.c | 7 + > qemu/vl.c | 18 ++ > 6 files changed, 749 insertions(+), 0 deletions(-) > create mode 100644 qemu/hw/device-assignment.c > create mode 100644 qemu/hw/device-assignment.h > > diff --git a/qemu/Makefile.target b/qemu/Makefile.target > index d9bdeca..05a1d84 100644 > --- a/qemu/Makefile.target > +++ b/qemu/Makefile.target > @@ -621,6 +621,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o > OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o > OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o > OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o > +OBJS+= device-assignment.o > ifeq ($(USE_KVM_PIT), 1) > OBJS+= i8254-kvm.o > endif > diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c > new file mode 100644 > index 0000000..5ba21a0 > --- /dev/null > +++ b/qemu/hw/device-assignment.c > @@ -0,0 +1,619 @@ > +/* > + * Copyright (c) 2007, Neocleus Corporation. > + * > + * This program is free software; you can redistribute it and/or > modify it + * under the terms and conditions of the GNU General > Public License, + * version 2, as published by the Free Software > Foundation. + * > + * This program is distributed in the hope it will be useful, but > WITHOUT + * ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU > General Public License for + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with + * this program; if not, write to the Free Software > Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA > 02111-1307 USA. + * > + * > + * Assign a PCI device from the host to a guest VM. > + * > + * Adapted for KVM by Qumranet. > + * > + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) > + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) > + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED]) > + * Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED]) > + */ > +#include <stdio.h> > +#include <sys/io.h> > +#include "qemu-kvm.h" > +#include "hw.h" > +#include "pc.h" > +#include "sysemu.h" > +#include "console.h" > +#include <linux/kvm_para.h> > +#include "device-assignment.h" > + > +/* From linux/ioport.h */ > +#define IORESOURCE_IO 0x00000100 /* Resource type */ > +#define IORESOURCE_MEM 0x00000200 > +#define IORESOURCE_IRQ 0x00000400 > +#define IORESOURCE_DMA 0x00000800 > +#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ > + > +/* #define DEVICE_ASSIGNMENT_DEBUG 1 */ > + > +#ifdef DEVICE_ASSIGNMENT_DEBUG > +#define DEBUG(fmt, args...) \ > + do { \ > + fprintf(stderr, "%s: " fmt, __func__ , ## args); \ > + } while (0) > +#else > +#define DEBUG(fmt, args...) do { } while(0) > +#endif > + > +static void assigned_dev_ioport_writeb(void *opaque, uint32_t addr, > + uint32_t value) > +{ > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > + uint32_t r_pio = (unsigned long)r_access->r_virtbase > + + (addr - r_access->e_physbase); > + > + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x" > + " r_virtbase=%08lx value=%08x\n", > + __func__, r_pio, (int)r_access->e_physbase, > + (unsigned long)r_access->r_virtbase, value); > + outb(value, r_pio); > +} > + > +static void assigned_dev_ioport_writew(void *opaque, uint32_t addr, > + uint32_t value) > +{ > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > + uint32_t r_pio = (unsigned long)r_access->r_virtbase > + + (addr - r_access->e_physbase); > + > + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x" > + " r_virtbase=%08lx value=%08x\n", > + __func__, r_pio, (int)r_access->e_physbase, > + (unsigned long)r_access->r_virtbase, value); > + outw(value, r_pio); > +} > + > +static void assigned_dev_ioport_writel(void *opaque, uint32_t addr, > + uint32_t value) > +{ > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > + uint32_t r_pio = (unsigned long)r_access->r_virtbase > + + (addr - r_access->e_physbase); > + > + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x" > + " r_virtbase=%08lx value=%08x\n", > + __func__, r_pio, (int)r_access->e_physbase, > + (unsigned long)r_access->r_virtbase, value); > + outl(value, r_pio); > +} > + > +static uint32_t assigned_dev_ioport_readb(void *opaque, uint32_t > addr) +{ > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > + uint32_t r_pio = (addr - r_access->e_physbase) > + + (unsigned long)r_access->r_virtbase; > + uint32_t value; > + > + value = inb(r_pio); > + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x " > + "r_virtbase=%08lx value=%08x\n", > + __func__, r_pio, (int)r_access->e_physbase, > + (unsigned long)r_access->r_virtbase, value); > + return value; > +} > + > +static uint32_t assigned_dev_ioport_readw(void *opaque, uint32_t > addr) +{ > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > + uint32_t r_pio = (addr - r_access->e_physbase) > + + (unsigned long)r_access->r_virtbase; > + uint32_t value; > + > + value = inw(r_pio); > + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x " > + "r_virtbase=%08lx value=%08x\n", > + __func__, r_pio, (int)r_access->e_physbase, > + (unsigned long)r_access->r_virtbase, value); > + return value; > +} > + > +static uint32_t assigned_dev_ioport_readl(void *opaque, uint32_t > addr) +{ > + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; > + uint32_t r_pio = (addr - r_access->e_physbase) > + + (unsigned long)r_access->r_virtbase; > + uint32_t value; > + > + value = inl(r_pio); > + DEBUG(stderr, "%s: r_pio=%08x e_physbase=%08x " > + "r_virtbase=%08lx value=%08x\n", > + __func__, r_pio, (int)r_access->e_physbase, > + (unsigned long)r_access->r_virtbase, value); > + return value; > +} > + > +static void assigned_dev_iomem_map(PCIDevice *pci_dev, int > region_num, + uint32_t e_phys, > uint32_t e_size, int type) +{ > + AssignedDevice *r_dev = (AssignedDevice *) pci_dev; > + AssignedDevRegion *region = &r_dev->v_addrs[region_num]; > + int first_map = (region->e_size == 0); > + int ret = 0; > + > + DEBUG("%s: e_phys=%08x r_virt=%x type=%d len=%08x region_num=%d > \n", + __func__, e_phys, (uint32_t)region->r_virtbase, type, > e_size, + region_num); > + > + region->e_physbase = e_phys; > + region->e_size = e_size; > + > + /* FIXME: Add support for emulated MMIO for non-kvm guests */ > + if (kvm_enabled()) { > + if (!first_map) > + kvm_destroy_phys_mem(kvm_context, e_phys, e_size); A typo? Need to destory orignal registered address? > + if (e_size > 0) > + ret = kvm_register_phys_mem(kvm_context, e_phys, > + region->r_virtbase, e_size, > 0); + if (ret != 0) > + fprintf(stderr, "%s: Error: create new mapping > failed\n", __func__); + } > +} > + > +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int > region_num, + uint32_t addr, > uint32_t size, int type) +{ > + AssignedDevice *r_dev = (AssignedDevice *) pci_dev; > + AssignedDevRegion *region = &r_dev->v_addrs[region_num]; > + int r; > + > + region->e_physbase = addr; > + region->e_size = size; > + > + DEBUG("%s: e_phys=0x%x r_virt=%x type=0x%x len=%d region_num=%d > \n", + __func__, addr, (uint32_t)region->r_virtbase, type, > size, region_num); + > + r = ioperm((uint32_t)region->r_virtbase, size, 1); > + if (r < 0) { > + perror("assigned_dev_ioport_map: ioperm"); > + return; > + } > + > + register_ioport_read(addr, size, 1, assigned_dev_ioport_readb, > + (void *) (r_dev->v_addrs + region_num)); > + register_ioport_read(addr, size, 2, assigned_dev_ioport_readw, > + (void *) (r_dev->v_addrs + region_num)); > + register_ioport_read(addr, size, 4, assigned_dev_ioport_readl, > + (void *) (r_dev->v_addrs + region_num)); > + register_ioport_write(addr, size, 1, assigned_dev_ioport_writeb, > + (void *) (r_dev->v_addrs + region_num)); > + register_ioport_write(addr, size, 2, assigned_dev_ioport_writew, > + (void *) (r_dev->v_addrs + region_num)); > + register_ioport_write(addr, size, 4, assigned_dev_ioport_writel, > + (void *) (r_dev->v_addrs + region_num)); > +} > + > +static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t > address, + uint32_t val, int > len) +{ > + int fd, r; > + > + DEBUG("%s: (%x.%x): address=%04x val=0x%08x len=%d\n", > + __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), > + (uint16_t) address, val, len); > + > + if (address == 0x4) { > + pci_default_write_config(d, address, val, len); > + /* Continue to program the card */ > + } > + > + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || > + address == 0x3c || address == 0x3d) { > + /* used for update-mappings (BAR emulation) */ > + pci_default_write_config(d, address, val, len); > + return; > + } > + DEBUG("%s: NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n", > + __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), > + (uint16_t) address, val, len); > + fd = ((AssignedDevice *)d)->real_device.config_fd; > + r = lseek(fd, address, SEEK_SET); > + if (r < 0) { > + fprintf(stderr, "%s: bad seek, errno = %d\n", __func__, > errno); + return; > + } > +again: > + r = write(fd, &val, len); > + if (r < 0) { > + if (errno == EINTR || errno == EAGAIN) > + goto again; > + fprintf(stderr, "%s: write failed, errno = %d\n", __func__, > errno); + } > +} > + > +static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t > address, + int len) > +{ > + uint32_t val = 0; > + int fd, r; > + > + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || > + address == 0x3c || address == 0x3d) { > + val = pci_default_read_config(d, address, len); > + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, > val, len); + return val; > + } > + > + /* vga specific, remove later */ > + if (address == 0xFC) > + goto do_log; > + > + fd = ((AssignedDevice *)d)->real_device.config_fd; > + r = lseek(fd, address, SEEK_SET); > + if (r < 0) { > + fprintf(stderr, "%s: bad seek, errno = %d\n", __func__, > errno); + return val; > + } > +again: > + r = read(fd, &val, len); > + if (r < 0) { > + if (errno == EINTR || errno == EAGAIN) > + goto again; > + fprintf(stderr, "%s: read failed, errno = %d\n", > + __func__, errno); > + } > +do_log: > + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, > len); + > + /* kill the special capabilities */ > + if (address == 4 && len == 4) > + val &= ~0x100000; > + else if (address == 6) > + val &= ~0x10; > + > + return val; > +} > + > +static int assigned_dev_register_regions(PCIRegion *io_regions, > + unsigned long regions_num, > + AssignedDevice *pci_dev) > +{ > + uint32_t i; > + PCIRegion *cur_region = io_regions; > + > + for (i = 0; i < regions_num; i++, cur_region++) { > + if (!cur_region->valid) > + continue; > + pci_dev->v_addrs[i].num = i; > + > + /* handle memory io regions */ > + if (cur_region->type & IORESOURCE_MEM) { > + int t = cur_region->type & IORESOURCE_PREFETCH > + ? PCI_ADDRESS_SPACE_MEM_PREFETCH > + : PCI_ADDRESS_SPACE_MEM; > + > + /* map physical memory */ > + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; > + pci_dev->v_addrs[i].r_virtbase = > + mmap(NULL, > + (cur_region->size + 0xFFF) & 0xFFFFF000, > + PROT_WRITE | PROT_READ, MAP_SHARED, > + cur_region->resource_fd, (off_t) 0); > + > + if ((void *) -1 == pci_dev->v_addrs[i].r_virtbase) { > + fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!" > + "\n", __func__, > + (uint32_t) (cur_region->base_addr)); > + return -1; > + } > + pci_dev->v_addrs[i].r_size = cur_region->size; > + pci_dev->v_addrs[i].e_size = 0; > + > + /* add offset */ > + pci_dev->v_addrs[i].r_virtbase += > + (cur_region->base_addr & 0xFFF); > + > + pci_register_io_region((PCIDevice *) pci_dev, i, > + cur_region->size, t, > + assigned_dev_iomem_map); > + continue; > + } > + /* handle port io regions */ > + pci_register_io_region((PCIDevice *) pci_dev, i, > + cur_region->size, > PCI_ADDRESS_SPACE_IO, + > assigned_dev_ioport_map); + > + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; > + pci_dev->v_addrs[i].r_virtbase = > + (void *)(long)cur_region->base_addr; > + /* not relevant for port io */ > + pci_dev->v_addrs[i].memory_index = 0; > + } > + > + /* success */ > + return 0; > +} > + > +static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus, > + uint8_t r_dev, uint8_t r_func) > +{ > + char dir[128], name[128], comp[16]; > + int fd, r = 0; > + FILE *f; > + unsigned long long start, end, size, flags; > + PCIRegion *rp; > + PCIDevRegions *dev = &pci_dev->real_device; > + > + dev->region_number = 0; > + > + snprintf(dir, 128, "/sys/bus/pci/devices/0000:%02x:%02x.%x/", > + r_bus, r_dev, r_func); > + strncpy(name, dir, 128); > + strncat(name, "config", 6); > + fd = open(name, O_RDWR); > + if (fd == -1) { > + fprintf(stderr, "%s: %s: %m\n", __func__, name); > + return 1; > + } > + dev->config_fd = fd; > +again: > + r = read(fd, pci_dev->dev.config, sizeof(pci_dev->dev.config)); > + if (r < 0) { > + if (errno == EINTR || errno == EAGAIN) > + goto again; > + fprintf(stderr, "%s: read failed, errno = %d\n", __func__, > errno); + } > + strncpy(name, dir, 128); > + strncat(name, "resource", 8); > + > + f = fopen(name, "r"); > + if (f == NULL) { > + fprintf(stderr, "%s: %s: %m\n", __func__, name); > + return 1; > + } > + r = -1; > + while (fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) == 3) > { + r++; > + rp = dev->regions + r; > + rp->valid = 0; > + size = end - start + 1; > + flags &= IORESOURCE_IO | IORESOURCE_MEM | > IORESOURCE_PREFETCH; + if (size == 0 || (flags & > ~IORESOURCE_PREFETCH) == 0) + continue; > + if (flags & IORESOURCE_MEM) { > + flags &= ~IORESOURCE_IO; > + snprintf(comp, 16, "resource%d", r); > + strncpy(name, dir, 128); > + strncat(name, comp, 16); > + fd = open(name, O_RDWR); > + if (fd == -1) > + continue; /* probably ROM */ > + rp->resource_fd = fd; > + } else > + flags &= ~IORESOURCE_PREFETCH; > + > + rp->type = flags; > + rp->valid = 1; > + rp->base_addr = start; > + rp->size = size; > + DEBUG("%s: region %d size %d start 0x%x type %d resource_fd > %d\n", + __func__, r, rp->size, start, rp->type, > rp->resource_fd); + } > + fclose(f); > + > + dev->region_number = r; > + return 0; > +} > + > +static int disable_iommu; > +int nr_assigned_devices; > +static LIST_HEAD(, AssignedDevInfo) adev_head; > + > +static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn) > +{ > + return (uint32_t)bus << 8 | (uint32_t)devfn; > +} > + > +static AssignedDevice *register_real_device(PCIBus *e_bus, > + const char *e_dev_name, > + int e_devfn, uint8_t > r_bus, + uint8_t r_dev, > uint8_t r_func) +{ > + int r; > + AssignedDevice *pci_dev; > + uint8_t e_device, e_intx; > + > + DEBUG("%s: Registering real physical device %s (devfn=0x%x)\n", > + __func__, e_dev_name, e_devfn); > + > + pci_dev = (AssignedDevice *) > + pci_register_device(e_bus, e_dev_name, > sizeof(AssignedDevice), + e_devfn, > assigned_dev_pci_read_config, + > assigned_dev_pci_write_config); + if (NULL == pci_dev) { > + fprintf(stderr, "%s: Error: Couldn't register real device > %s\n", + __func__, e_dev_name); > + return NULL; > + } > + if (get_real_device(pci_dev, r_bus, r_dev, r_func)) { > + fprintf(stderr, "%s: Error: Couldn't get real device > (%s)!\n", + __func__, e_dev_name); > + goto out; > + } > + > + /* handle real device's MMIO/PIO BARs */ > + if (assigned_dev_register_regions(pci_dev->real_device.regions, > + > pci_dev->real_device.region_number, + > pci_dev)) + goto out; > + > + /* handle interrupt routing */ > + e_device = (pci_dev->dev.devfn >> 3) & 0x1f; > + e_intx = pci_dev->dev.config[0x3d] - 1; > + pci_dev->intpin = e_intx; > + pci_dev->run = 0; > + pci_dev->girq = 0; > + pci_dev->h_busnr = r_bus; > + pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func); > + > +#ifdef KVM_CAP_DEVICE_ASSIGNMENT > + if (kvm_enabled()) { > + struct kvm_assigned_pci_dev assigned_dev_data; > + > + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); > + assigned_dev_data.assigned_dev_id = > + calc_assigned_dev_id(pci_dev->h_busnr, > + (uint32_t)pci_dev->h_devfn); > + assigned_dev_data.busnr = pci_dev->h_busnr; > + assigned_dev_data.devfn = pci_dev->h_devfn; > + > +#ifdef KVM_CAP_IOMMU > + /* We always enable the IOMMU if present > + * (or when not disabled on the command line) > + */ > + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); > + if (r && !disable_iommu) > + assigned_dev_data.flags |= KVM_DEV_ASSIGN_ENABLE_IOMMU; > +#endif > + r = kvm_assign_pci_device(kvm_context, &assigned_dev_data); > + if (r < 0) { > + fprintf(stderr, "Could not notify kernel about " > + "assigned device \"%s\"\n", e_dev_name); > + perror("register_real_device"); > + goto out; > + } > + } > +#endif > + term_printf("Registered host PCI device %02x:%02x.%1x " > + "(\"%s\") as guest device %02x:%02x.%1x\n", > + r_bus, r_dev, r_func, e_dev_name, > + pci_bus_num(e_bus), e_device, r_func); > + > + return pci_dev; > +out: > +/* pci_unregister_device(&pci_dev->dev); */ > + return NULL; > +} > + > +#ifdef KVM_CAP_DEVICE_ASSIGNMENT > +/* The pci config space got updated. Check if irq numbers have > changed + * for our devices > + */ > +void assigned_dev_update_irq(PCIDevice *d) > +{ > + int irq, r; > + AssignedDevice *assigned_dev; > + AssignedDevInfo *adev; > + > + LIST_FOREACH(adev, &adev_head, next) { > + assigned_dev = adev->assigned_dev; > + irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin); > + irq = piix_get_irq(irq); > + > + if (irq != assigned_dev->girq) { > + struct kvm_assigned_irq assigned_irq_data; > + > + memset(&assigned_irq_data, 0, sizeof(assigned_irq_data)); > + assigned_irq_data.assigned_dev_id = > + calc_assigned_dev_id(assigned_dev->h_busnr, > + (uint8_t) > assigned_dev->h_devfn); + assigned_irq_data.guest_irq = > irq; + assigned_irq_data.host_irq = > assigned_dev->real_device.irq; + r = > kvm_assign_irq(kvm_context, &assigned_irq_data); + if (r < > 0) { + perror("assigned_dev_update_irq"); > + fprintf(stderr, "Are you assigning a device " > + "that shares IRQ with some other device?\n"); > + pci_unregister_device(&assigned_dev->dev); > + /* FIXME: Delete node from list */ > + continue; > + } > + assigned_dev->girq = irq; > + } > + } > +} > +#endif > + > +struct PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus > *bus) +{ > + adev->assigned_dev = register_real_device(bus, > + adev->name, -1, > + adev->bus, > + adev->dev, > + adev->func); > + return &adev->assigned_dev->dev; > +} > + > +int init_all_assigned_devices(PCIBus *bus) > +{ > + struct AssignedDevInfo *adev; > + > + LIST_FOREACH(adev, &adev_head, next) > + if (init_assigned_device(adev, bus) == NULL) > + return -1; > + return 0; > +} > + > +/* > + * Syntax to assign device: > + * > + * -pcidevice dev=bus:dev.func,dma=dma > + * > + * Example: > + * -pcidevice host=00:13.0,dma=pvdma > + * > + * dma can currently only be 'none' to disable iommu support. > + */ > +AssignedDevInfo *add_assigned_device(const char *arg) > +{ > + char *cp, *cp1; > + char device[8]; > + char dma[6]; > + int r; > + AssignedDevInfo *adev; > + > + adev = qemu_mallocz(sizeof(AssignedDevInfo)); > + if (adev == NULL) { > + fprintf(stderr, "%s: Out of memory\n", __func__); > + return NULL; > + } > + r = get_param_value(device, sizeof(device), "host", arg); > + r = get_param_value(adev->name, sizeof(adev->name), "name", arg); > + if (!r) > + strncpy(adev->name, device, 8); > + > +#ifdef KVM_CAP_IOMMU > + r = get_param_value(dma, sizeof(dma), "dma", arg); > + if (r && !strncmp(dma, "none", 4)) > + disable_iommu = 1; > +#endif > + cp = device; > + adev->bus = strtoul(cp, &cp1, 16); > + if (*cp1 != ':') > + goto bad; > + cp = cp1 + 1; > + > + adev->dev = strtoul(cp, &cp1, 16); > + if (*cp1 != '.') > + goto bad; > + cp = cp1 + 1; > + > + adev->func = strtoul(cp, &cp1, 16); > + > + nr_assigned_devices++; > + LIST_INSERT_HEAD(&adev_head, adev, next); > + return adev; > +bad: > + fprintf(stderr, "pcidevice argument parse error; " > + "please check the help text for usage\n"); > + qemu_free(adev); > + return NULL; > +} > diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h > new file mode 100644 > index 0000000..e4148df > --- /dev/null > +++ b/qemu/hw/device-assignment.h > @@ -0,0 +1,98 @@ > +/* > + * Copyright (c) 2007, Neocleus Corporation. > + * Copyright (c) 2007, Intel Corporation. > + * > + * This program is free software; you can redistribute it and/or > modify it + * under the terms and conditions of the GNU General > Public License, + * version 2, as published by the Free Software > Foundation. + * > + * This program is distributed in the hope it will be useful, but > WITHOUT + * ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU > General Public License for + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with + * this program; if not, write to the Free Software > Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA > 02111-1307 USA. + * > + * Data structures for storing PCI state > + * > + * Adapted to kvm by Qumranet > + * > + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) > + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) > + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED]) > + * Copyright (C) 2008, Red Hat, Amit Shah ([EMAIL PROTECTED]) > + */ > + > +#ifndef __DEVICE_ASSIGNMENT_H__ > +#define __DEVICE_ASSIGNMENT_H__ > + > +#include <sys/mman.h> > +#include "qemu-common.h" > +#include "sys-queue.h" > +#include "pci.h" > + > +/* From include/linux/pci.h in the kernel sources */ > +#define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & > 0x07)) + > +#define MAX_IO_REGIONS (6) > + > +typedef struct { > + int type; /* Memory or port I/O */ > + int valid; > + uint32_t base_addr; > + uint32_t size; /* size of the region */ > + int resource_fd; > +} PCIRegion; > + > +typedef struct { > + uint8_t bus, dev, func; /* Bus inside domain, device and > function */ + int irq; /* IRQ number */ > + uint16_t region_number; /* number of active regions */ > + > + /* Port I/O or MMIO Regions */ > + PCIRegion regions[MAX_IO_REGIONS]; > + int config_fd; > +} PCIDevRegions; > + > +typedef struct { > + target_phys_addr_t e_physbase; > + uint32_t memory_index; > + void *r_virtbase; /* mmapped access address */ > + int num; /* our index within v_addrs[] */ > + uint32_t e_size; /* emulated size of region in bytes */ > + uint32_t r_size; /* real size of region in bytes */ > +} AssignedDevRegion; > + > +typedef struct { > + PCIDevice dev; > + int intpin; > + uint8_t debug_flags; > + AssignedDevRegion v_addrs[PCI_NUM_REGIONS]; > + PCIDevRegions real_device; > + int run; > + int girq; > + unsigned char h_busnr; > + unsigned int h_devfn; > + int bound; > +} AssignedDevice; > + > +typedef struct AssignedDevInfo AssignedDevInfo; > + > +struct AssignedDevInfo { > + char name[15]; > + int bus; > + int dev; > + int func; > + AssignedDevice *assigned_dev; > + LIST_ENTRY(AssignedDevInfo) next; > +}; > + > +PCIDevice *init_assigned_device(AssignedDevInfo *adev, PCIBus *bus); > +int init_all_assigned_devices(PCIBus *bus); > +AssignedDevInfo *add_assigned_device(const char *arg); > +void assigned_dev_set_vector(int irq, int vector); > +void assigned_dev_ack_mirq(int vector); > + > +#endif /* __DEVICE_ASSIGNMENT_H__ */ > diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c > index d559f0c..e0438ed 100644 > --- a/qemu/hw/pc.c > +++ b/qemu/hw/pc.c > @@ -33,6 +33,7 @@ > #include "boards.h" > #include "console.h" > #include "fw_cfg.h" > +#include "device-assignment.h" > > #include "qemu-kvm.h" > > @@ -993,6 +994,11 @@ static void pc_init1(ram_addr_t ram_size, int > vga_ram_size, } > } > > + /* Initialize assigned devices */ > + if (pci_enabled) > + if(init_all_assigned_devices(pci_bus)) > + exit(1); > + > rtc_state = rtc_init(0x70, i8259[8]); > > qemu_register_boot_set(pc_boot_set, rtc_state); > diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c > index c82cd20..f86a8a7 100644 > --- a/qemu/hw/pci.c > +++ b/qemu/hw/pci.c > @@ -50,6 +50,7 @@ struct PCIBus { > > static void pci_update_mappings(PCIDevice *d); > static void pci_set_irq(void *opaque, int irq_num, int level); > +void assigned_dev_update_irq(PCIDevice *d); > > target_phys_addr_t pci_mem_base; > static int pci_irq_index; > @@ -453,6 +454,12 @@ void pci_default_write_config(PCIDevice *d, > val >>= 8; > } > > +#ifdef KVM_CAP_DEVICE_ASSIGNMENT > + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel() && > + address >= 0x60 && address <= 0x63) > + assigned_dev_update_irq(d); > +#endif > + > end = address + len; > if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) { > /* if the command register is modified, we must modify the > mappings */ > diff --git a/qemu/vl.c b/qemu/vl.c > index 388e79d..5a39d12 100644 > --- a/qemu/vl.c > +++ b/qemu/vl.c > @@ -38,6 +38,7 @@ > #include "qemu-char.h" > #include "block.h" > #include "audio/audio.h" > +#include "hw/device-assignment.h" > #include "migration.h" > #include "balloon.h" > #include "qemu-kvm.h" > @@ -8692,6 +8693,12 @@ static void help(int exitcode) > #endif > "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n" > "-no-kvm-pit disable KVM kernel mode PIT\n" > +#if defined(TARGET_I386) || defined(TARGET_X86_64) || > defined(__linux__) + "-pcidevice > host=bus:dev.func[,dma=none][,name=\"string\"]\n" + " > expose a PCI device to the guest OS.\n" + " > dma=none: don't perform any dma translations (default is to use an > iommu)\n" + " 'string' is used in log > output.\n" +#endif #endif > #ifdef TARGET_I386 > "-no-acpi disable ACPI\n" > @@ -8811,6 +8818,9 @@ enum { > QEMU_OPTION_no_kvm, > QEMU_OPTION_no_kvm_irqchip, > QEMU_OPTION_no_kvm_pit, > +#if defined(TARGET_I386) || defined(TARGET_X86_64) || > defined(__linux__) + QEMU_OPTION_pcidevice, > +#endif > QEMU_OPTION_no_reboot, > QEMU_OPTION_no_shutdown, > QEMU_OPTION_show_cursor, > @@ -8900,6 +8910,9 @@ static const QEMUOption qemu_options[] = { > #endif > { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip }, > { "no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit }, > +#if defined(TARGET_I386) || defined(TARGET_X86_64) || > defined(__linux__) + { "pcidevice", HAS_ARG, QEMU_OPTION_pcidevice > }, +#endif > #endif > #if defined(TARGET_PPC) || defined(TARGET_SPARC) > { "g", 1, QEMU_OPTION_g }, > @@ -9844,6 +9857,11 @@ int main(int argc, char **argv) > kvm_pit = 0; > break; > } > +#if defined(TARGET_I386) || defined(TARGET_X86_64) || > defined(__linux__) + case QEMU_OPTION_pcidevice: > + add_assigned_device(optarg); > + break; > +#endif > #endif > case QEMU_OPTION_usb: > usb_enabled = 1; > -- > 1.6.0.2
Best Regards, Disheng, Su -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html