* On Wednesday 17 Sep 2008 11:15:16 Zhang, Xiantao wrote: > Seems it lacks device-assignment.[c,h] ? > Xiantao
Hmm, here is the version with the files. >From cd82862ef7493afd3431e538b85adb9771f94da6 Mon Sep 17 00:00:00 2001 From: Amit Shah <[EMAIL PROTECTED]> Date: Tue, 16 Sep 2008 23:09:23 +0530 Subject: [PATCH] KVM/userspace: Support for assigning PCI devices to guests [This still doesn't include some fixes to review comments. I'm posting this just so that people can use this to test or base their work off the latest patch.] From: Or Sagi <[EMAIL PROTECTED]> From: Nir Peleg <[EMAIL PROTECTED]> From: Amit Shah <[EMAIL PROTECTED]> From: Ben-Ami Yassour <[EMAIL PROTECTED]> From: Glauber de Oliveira Costa <[EMAIL PROTECTED]> With this patch, we can assign a device on the host machine to a guest. A new command-line option, -pcidevice is added. For example, to invoke it for a device sitting at PCI bus:dev.fn 04:08.0 with host IRQ 18, use this: -pcidevice host=04:08.0 The host driver for the device, if any, is to be removed before assigning the device. This works only with the in-kernel irqchip method; to use the userspace irqchip, a kernel module (irqhook) and some extra changes are needed. Signed-off-by: Amit Shah <[EMAIL PROTECTED]> --- libkvm/libkvm-x86.c | 14 + libkvm/libkvm.h | 27 ++ qemu/Makefile.target | 1 + qemu/hw/device-assignment.c | 605 +++++++++++++++++++++++++++++++++++++++++++ qemu/hw/device-assignment.h | 92 +++++++ qemu/hw/isa.h | 2 + qemu/hw/pc.c | 9 + qemu/hw/pci.c | 12 + qemu/hw/pci.h | 1 + qemu/hw/piix_pci.c | 19 ++ qemu/qemu-kvm-x86.c | 3 + qemu/vl.c | 18 ++ 12 files changed, 803 insertions(+), 0 deletions(-) create mode 100644 qemu/hw/device-assignment.c create mode 100644 qemu/hw/device-assignment.h diff --git a/libkvm/libkvm-x86.c b/libkvm/libkvm-x86.c index a8cca15..6157f75 100644 --- a/libkvm/libkvm-x86.c +++ b/libkvm/libkvm-x86.c @@ -53,6 +53,20 @@ static int kvm_init_tss(kvm_context_t kvm) return 0; } +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +int kvm_assign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev) +{ + return ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_DEVICE, assigned_dev); +} + +int kvm_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq) +{ + return ioctl(kvm->vm_fd, KVM_ASSIGN_IRQ, assigned_irq); +} +#endif + int kvm_create_pit(kvm_context_t kvm) { #ifdef KVM_CAP_PIT diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h index 79dd769..edf8e9e 100644 --- a/libkvm/libkvm.h +++ b/libkvm/libkvm.h @@ -658,4 +658,31 @@ int kvm_s390_interrupt(kvm_context_t kvm, int slot, int kvm_s390_set_initial_psw(kvm_context_t kvm, int slot, psw_t psw); int kvm_s390_store_status(kvm_context_t kvm, int slot, unsigned long addr); #endif + +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +/*! + * \brief Notifies host kernel aboud a PCI device assigned to guest + * + * Used for PCI device assignment, this function notifies the host + * kernel about the assigning of the physical PCI device. + * + * \param kvm Pointer to the current kvm_context + * \param assigned_dev Parameters, like bus, devfn number, etc + */ +int kvm_assign_pci_device(kvm_context_t kvm, + struct kvm_assigned_pci_dev *assigned_dev); + +/*! + * \brief Notifies host kernel about changes to a irq assignment + * + * Used for PCI device assignment, this function notifies the host + * kernel about the assigning of the irq for an assigned physical + * PCI device. + * + * \param kvm Pointer to the current kvm_context + * \param assigned_irq Parameters, like dev id, host irq, guest irq, etc + */ +int kvm_assign_irq(kvm_context_t kvm, + struct kvm_assigned_irq *assigned_irq); +#endif #endif diff --git a/qemu/Makefile.target b/qemu/Makefile.target index 89814fd..958c33b 100644 --- a/qemu/Makefile.target +++ b/qemu/Makefile.target @@ -611,6 +611,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o +OBJS+= device-assignment.o ifeq ($(USE_KVM_PIT), 1) OBJS+= i8254-kvm.o endif diff --git a/qemu/hw/device-assignment.c b/qemu/hw/device-assignment.c new file mode 100644 index 0000000..d32bbb4 --- /dev/null +++ b/qemu/hw/device-assignment.c @@ -0,0 +1,605 @@ +/* + * Copyright (c) 2007, Neocleus Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * + * Assign a PCI device from the host to a guest VM. + * + * Adapted for KVM by Qumranet. + * + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED]) + */ +#include <stdio.h> +#include <sys/io.h> +#include "qemu-kvm.h" +#include <linux/kvm_para.h> +#include "device-assignment.h" + +/* From linux/ioport.h */ +#define IORESOURCE_IO 0x00000100 /* Resource type */ +#define IORESOURCE_MEM 0x00000200 +#define IORESOURCE_IRQ 0x00000400 +#define IORESOURCE_DMA 0x00000800 +#define IORESOURCE_PREFETCH 0x00001000 /* No side effects */ + +/* #define DEVICE_ASSIGNMENT_DEBUG */ + +#ifdef DEVICE_ASSIGNMENT_DEBUG +#define DEBUG(fmt, args...) fprintf(stderr, "%s: " fmt, __func__ , ## args) +#else +#define DEBUG(fmt, args...) +#endif + +#define assigned_dev_ioport_write(suffix) \ + static void assigned_dev_ioport_write##suffix(void *opaque, uint32_t addr, \ + uint32_t value) \ + { \ + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; \ + uint32_t r_pio = (unsigned long)r_access->r_virtbase \ + + (addr - r_access->e_physbase); \ + if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) { \ + fprintf(stderr, "assigned_dev_ioport_write" #suffix \ + ": r_pio=%08x e_physbase=%08x" \ + " r_virtbase=%08lx value=%08x\n", \ + r_pio, (int)r_access->e_physbase, \ + (unsigned long)r_access->r_virtbase, value); \ + } \ + iopl(3); \ + out##suffix(value, r_pio); \ + } + +assigned_dev_ioport_write(b) +assigned_dev_ioport_write(w) +assigned_dev_ioport_write(l) + +#define assigned_dev_ioport_read(suffix) \ + static uint32_t assigned_dev_ioport_read##suffix(void *opaque, uint32_t addr) \ + { \ + AssignedDevRegion *r_access = (AssignedDevRegion *)opaque; \ + uint32_t r_pio = (addr - r_access->e_physbase) \ + + (unsigned long)r_access->r_virtbase; \ + uint32_t value = in##suffix(r_pio); \ + if (r_access->debug & DEVICE_ASSIGNMENT_DEBUG_PIO) { \ + fprintf(stderr, "assigned_dev_ioport_read" #suffix \ + ": r_pio=%08x e_physbase=%08x r_virtbase=%08lx " \ + "value=%08x\n", \ + r_pio, (int)r_access->e_physbase, \ + (unsigned long)r_access->r_virtbase, value); \ + } \ + return value; \ + } + +assigned_dev_ioport_read(b) +assigned_dev_ioport_read(w) +assigned_dev_ioport_read(l) + +static void assigned_dev_iomem_map(PCIDevice *pci_dev, int region_num, + uint32_t e_phys, uint32_t e_size, int type) +{ + AssignedDevice *r_dev = (AssignedDevice *) pci_dev; + AssignedDevRegion *region = &r_dev->v_addrs[region_num]; + int first_map = (region->e_size == 0); + int ret = 0; + + DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n", + e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size, + region_num); + + region->e_physbase = e_phys; + region->e_size = e_size; + + if (!first_map) + kvm_destroy_phys_mem(kvm_context, e_phys, e_size); + if (e_size > 0) + ret = kvm_register_phys_mem(kvm_context, + e_phys, + region->r_virtbase, + e_size, + 0); + if (ret != 0) + fprintf(stderr, "%s: Error: create new mapping failed\n", + __func__); +} + +static void assigned_dev_ioport_map(PCIDevice *pci_dev, int region_num, + uint32_t addr, uint32_t size, int type) +{ + AssignedDevice *r_dev = (AssignedDevice *) pci_dev; + int i; + uint32_t ((*rf[])(void *, uint32_t)) = + { assigned_dev_ioport_readb, + assigned_dev_ioport_readw, + assigned_dev_ioport_readl + }; + void ((*wf[])(void *, uint32_t, uint32_t)) = + { assigned_dev_ioport_writeb, + assigned_dev_ioport_writew, + assigned_dev_ioport_writel + }; + + r_dev->v_addrs[region_num].e_physbase = addr; + DEBUG("%s: address=0x%x type=0x%x len=%d region_num=%d \n", + __func__, addr, type, size, region_num); + + for (i = 0; i < 3; i++) { + register_ioport_write(addr, size, 1<<i, wf[i], + (void *) (r_dev->v_addrs + region_num)); + register_ioport_read(addr, size, 1<<i, rf[i], + (void *) (r_dev->v_addrs + region_num)); + } +} + +static void assigned_dev_pci_write_config(PCIDevice *d, uint32_t address, + uint32_t val, int len) +{ + int fd, r; + + DEBUG("%s: (%x.%x): address=%04x val=0x%08x len=%d\n", + __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), + (uint16_t) address, val, len); + + if (address == 0x4) + pci_default_write_config(d, address, val, len); + + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || + address == 0x3c || address == 0x3d) { + /* used for update-mappings (BAR emulation) */ + pci_default_write_config(d, address, val, len); + return; + } + DEBUG("%s: NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n", + __func__, ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), + (uint16_t) address, val, len); + fd = ((AssignedDevice *)d)->real_device.config_fd; + lseek(fd, address, SEEK_SET); +again: + r = write(fd, &val, len); + if (r < 0) { + if (errno == EINTR || errno == EAGAIN) + goto again; + fprintf(stderr, "%s: write failed, errno = %d\n", + __func__, errno); + } +} + +static uint32_t assigned_dev_pci_read_config(PCIDevice *d, uint32_t address, + int len) +{ + uint32_t val = 0; + int fd, r; + + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || + address == 0x3c || address == 0x3d) { + val = pci_default_read_config(d, address, len); + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, + len); + return val; + } + + /* vga specific, remove later */ + if (address == 0xFC) + goto do_log; + + fd = ((AssignedDevice *)d)->real_device.config_fd; + lseek(fd, address, SEEK_SET); +again: + r = read(fd, &val, len); + if (r < 0) { + if (errno == EINTR || errno == EAGAIN) + goto again; + fprintf(stderr, "%s: read failed, errno = %d\n", + __func__, errno); + } +do_log: + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); + + /* kill the special capabilities */ + if (address == 4 && len == 4) + val &= ~0x100000; + else if (address == 6) + val &= ~0x10; + + return val; +} + +static int assigned_dev_register_regions(PCIRegion *io_regions, + unsigned long regions_num, + AssignedDevice *pci_dev) +{ + uint32_t i; + PCIRegion *cur_region = io_regions; + + for (i = 0; i < regions_num; i++, cur_region++) { + if (!cur_region->valid) + continue; +#ifdef DEVICE_ASSIGNMENT_DEBUG + pci_dev->v_addrs[i].debug |= DEVICE_ASSIGNMENT_DEBUG_MMIO + | DEVICE_ASSIGNMENT_DEBUG_PIO; +#endif + pci_dev->v_addrs[i].num = i; + + /* handle memory io regions */ + if (cur_region->type & IORESOURCE_MEM) { + int t = cur_region->type & IORESOURCE_PREFETCH + ? PCI_ADDRESS_SPACE_MEM_PREFETCH + : PCI_ADDRESS_SPACE_MEM; + + /* map physical memory */ + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; + pci_dev->v_addrs[i].r_virtbase = + mmap(NULL, + (cur_region->size + 0xFFF) & 0xFFFFF000, + PROT_WRITE | PROT_READ, MAP_SHARED, + cur_region->resource_fd, (off_t) 0); + + if ((void *) -1 == pci_dev->v_addrs[i].r_virtbase) { + fprintf(stderr, "%s: Error: Couldn't mmap 0x%x!" + "\n", __func__, + (uint32_t) (cur_region->base_addr)); + return -1; + } + pci_dev->v_addrs[i].r_size = cur_region->size; + pci_dev->v_addrs[i].e_size = 0; + + /* add offset */ + pci_dev->v_addrs[i].r_virtbase += + (cur_region->base_addr & 0xFFF); + + pci_register_io_region((PCIDevice *) pci_dev, i, + cur_region->size, t, + assigned_dev_iomem_map); + continue; + } + /* handle port io regions */ + pci_register_io_region((PCIDevice *) pci_dev, i, + cur_region->size, PCI_ADDRESS_SPACE_IO, + assigned_dev_ioport_map); + + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; + pci_dev->v_addrs[i].r_virtbase = + (void *)(long)cur_region->base_addr; + /* not relevant for port io */ + pci_dev->v_addrs[i].memory_index = 0; + } + + /* success */ + return 0; +} + +static int get_real_device(AssignedDevice *pci_dev, uint8_t r_bus, + uint8_t r_dev, uint8_t r_func) +{ + char dir[128], name[128], comp[16]; + int fd, r = 0; + FILE *f; + unsigned long long start, end, size, flags; + PCIRegion *rp; + PCIDevRegions *dev = &pci_dev->real_device; + + dev->region_number = 0; + + sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%x/", + r_bus, r_dev, r_func); + strcpy(name, dir); + strcat(name, "config"); + fd = open(name, O_RDWR); + if (fd == -1) { + fprintf(stderr, "%s: %s: %m\n", __func__, name); + return 1; + } + dev->config_fd = fd; +again: + r = read(fd, pci_dev->dev.config, sizeof pci_dev->dev.config); + if (r < 0) { + if (errno == EINTR || errno == EAGAIN) + goto again; + fprintf(stderr, "%s: read failed, errno = %d\n", + __func__, errno); + } + strcpy(name, dir); + strcat(name, "resource"); + + f = fopen(name, "r"); + if (f == NULL) { + fprintf(stderr, "%s: %s: %m\n", __func__, name); + return 1; + } + for (r = 0; fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) == 3; + r++) { + rp = dev->regions + r; + rp->valid = 0; + size = end - start + 1; + flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; + if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) + continue; + if (flags & IORESOURCE_MEM) { + flags &= ~IORESOURCE_IO; + sprintf(comp, "resource%d", r); + strcpy(name, dir); + strcat(name, comp); + fd = open(name, O_RDWR); + if (fd == -1) + continue; /* probably ROM */ + rp->resource_fd = fd; + } else + flags &= ~IORESOURCE_PREFETCH; + + rp->type = flags; + rp->valid = 1; + rp->base_addr = start; + rp->size = size; + DEBUG("%s: region %d size %d start 0x%x type %d " + "resource_fd %d\n", __func__, r, rp->size, start, + rp->type, rp->resource_fd); + } + fclose(f); + + dev->region_number = r; + return 0; +} + +#define MAX_ASSIGNED_DEVS 4 +struct { + char name[15]; + int bus; + int dev; + int func; + int dma; + AssignedDevice *assigned_dev; +} assigned_devices[MAX_ASSIGNED_DEVS]; + +int nr_assigned_devices; +static int disable_iommu; + +static uint32_t calc_assigned_dev_id(uint8_t bus, uint8_t devfn) +{ + return (uint32_t)bus << 8 | (uint32_t)devfn; +} + +static AssignedDevice *register_real_device(PCIBus *e_bus, + const char *e_dev_name, + int e_devfn, uint8_t r_bus, + uint8_t r_dev, uint8_t r_func, + int flags) +{ + int r; + AssignedDevice *pci_dev; + uint8_t e_device, e_intx; + + DEBUG("%s: Registering real physical device %s (devfn=0x%x)\n", + __func__, e_dev_name, e_devfn); + + pci_dev = (AssignedDevice *) + pci_register_device(e_bus, e_dev_name, sizeof(AssignedDevice), + e_devfn, assigned_dev_pci_read_config, + assigned_dev_pci_write_config); + if (NULL == pci_dev) { + fprintf(stderr, "%s: Error: Couldn't register real device %s\n", + __func__, e_dev_name); + return NULL; + } + if (get_real_device(pci_dev, r_bus, r_dev, r_func)) { + fprintf(stderr, "%s: Error: Couldn't get real device (%s)!\n", + __func__, e_dev_name); + goto out; + } + + /* handle real device's MMIO/PIO BARs */ + if (assigned_dev_register_regions(pci_dev->real_device.regions, + pci_dev->real_device.region_number, + pci_dev)) + goto out; + + /* handle interrupt routing */ + e_device = (pci_dev->dev.devfn >> 3) & 0x1f; + e_intx = pci_dev->dev.config[0x3d] - 1; + pci_dev->intpin = e_intx; + pci_dev->run = 0; + pci_dev->girq = 0; + pci_dev->h_busnr = r_bus; + pci_dev->h_devfn = PCI_DEVFN(r_dev, r_func); + +#ifdef KVM_CAP_DEVICE_ASSIGNMENT + if (kvm_enabled()) { + struct kvm_assigned_pci_dev assigned_dev_data; + + memset(&assigned_dev_data, 0, sizeof(assigned_dev_data)); + assigned_dev_data.assigned_dev_id = + calc_assigned_dev_id(pci_dev->h_busnr, + (uint32_t)pci_dev->h_devfn); + assigned_dev_data.busnr = pci_dev->h_busnr; + assigned_dev_data.devfn = pci_dev->h_devfn; + assigned_dev_data.flags = flags; +#ifdef KVM_CAP_PV_DMA + assigned_dev_data.guest_dev_id = + calc_assigned_dev_id(pci_bus_num(e_bus), + PCI_DEVFN(e_device, r_func)); +#endif + +#ifdef KVM_CAP_IOMMU + /* We always enable the IOMMU if present + * (or when not disabled on the command line) + */ + r = kvm_check_extension(kvm_context, KVM_CAP_IOMMU); + if (r && !disable_iommu) + assigned_devices[nr_assigned_devices].dma |= + KVM_DEV_ASSIGN_ENABLE_IOMMU; +#endif + r = kvm_assign_pci_device(kvm_context, + &assigned_dev_data); + if (r < 0) { + fprintf(stderr, "Could not notify kernel about " + "assigned device \"%s\"\n", e_dev_name); + perror("pt-ioctl"); + goto out; + } + } +#endif + fprintf(stderr, "Registered host PCI device %02x:%02x.%1x " + "(\"%s\") as guest device %02x:%02x.%1x\n", + r_bus, r_dev, r_func, e_dev_name, + pci_bus_num(e_bus), e_device, r_func); + + return pci_dev; +out: + pci_unregister_device(&pci_dev->dev); + return NULL; +} + +extern int get_param_value(char *buf, int buf_size, + const char *tag, const char *str); +extern int piix_get_irq(int); + +#ifdef KVM_CAP_DEVICE_ASSIGNMENT +/* The pci config space got updated. Check if irq numbers have changed + * for our devices + */ +void assigned_dev_update_irq(PCIDevice *d) +{ + int i, irq, r; + AssignedDevice *assigned_dev; + + for (i = 0; i < nr_assigned_devices; i++) { + assigned_dev = assigned_devices[i].assigned_dev; + if (assigned_dev == NULL) + continue; + + irq = pci_map_irq(&assigned_dev->dev, assigned_dev->intpin); + irq = piix_get_irq(irq); + + if (irq != assigned_dev->girq) { + struct kvm_assigned_irq assigned_irq_data; + + memset(&assigned_irq_data, 0, sizeof assigned_irq_data); + assigned_irq_data.assigned_dev_id = + calc_assigned_dev_id(assigned_dev->h_busnr, + (uint8_t) + assigned_dev->h_devfn); + assigned_irq_data.guest_irq = irq; + assigned_irq_data.host_irq = + assigned_dev->real_device.irq; + r = kvm_assign_irq(kvm_context, &assigned_irq_data); + if (r < 0) { + perror("assigned_dev_update_irq"); + fprintf(stderr, "Are you assigning a device " + "that shares IRQ with some other " + "device?\n"); + pci_unregister_device(&assigned_dev->dev); + continue; + } + assigned_dev->girq = irq; + } + } +} +#endif + +static int init_device_assignment(void) +{ + /* Do we have any devices to be assigned? */ + if (nr_assigned_devices == 0) + return -1; + iopl(3); + return 0; +} + +int init_assigned_device(PCIBus *bus, int *index) +{ + AssignedDevice *dev = NULL; + int i, ret = 0; + + if (*index == -1) { + if (init_device_assignment() < 0) + return -1; + + *index = nr_assigned_devices - 1; + } + i = *index; + dev = register_real_device(bus, assigned_devices[i].name, -1, + assigned_devices[i].bus, + assigned_devices[i].dev, + assigned_devices[i].func, + assigned_devices[i].dma); + if (dev == NULL) { + fprintf(stderr, "Error: Couldn't register device \"%s\"\n", + assigned_devices[i].name); + ret = -1; + } + assigned_devices[i].assigned_dev = dev; + + --*index; + return ret; +} + +/* + * Syntax to assign device: + * + * -pcidevice dev=bus:dev.func,dma=dma + * + * Example: + * -pcidevice dev=00:13.0,dma=pvdma + * + * dma can currently be 'none' to disable iommu support. + */ +void add_assigned_device(const char *arg) +{ + char *cp, *cp1; + char device[8]; + char dma[6]; + int r; + + if (nr_assigned_devices >= MAX_ASSIGNED_DEVS) { + fprintf(stderr, "Too many assigned devices (max %d)\n", + MAX_ASSIGNED_DEVS); + return; + } + memset(&assigned_devices[nr_assigned_devices], 0, + sizeof assigned_devices[nr_assigned_devices]); + + r = get_param_value(device, sizeof device, "host", arg); + + r = get_param_value(assigned_devices[nr_assigned_devices].name, + sizeof assigned_devices[nr_assigned_devices].name, + "name", arg); + if (!r) + strncpy(assigned_devices[nr_assigned_devices].name, device, 8); + +#ifdef KVM_CAP_IOMMU + r = get_param_value(dma, sizeof dma, "dma", arg); + if (r && !strncmp(dma, "none", 4)) + disable_iommu = 1; +#endif + cp = device; + assigned_devices[nr_assigned_devices].bus = strtoul(cp, &cp1, 16); + if (*cp1 != ':') + goto bad; + cp = cp1 + 1; + + assigned_devices[nr_assigned_devices].dev = strtoul(cp, &cp1, 16); + if (*cp1 != '.') + goto bad; + cp = cp1 + 1; + + assigned_devices[nr_assigned_devices].func = strtoul(cp, &cp1, 16); + + nr_assigned_devices++; + return; +bad: + fprintf(stderr, "pcidevice argument parse error; " + "please check the help text for usage\n"); +} diff --git a/qemu/hw/device-assignment.h b/qemu/hw/device-assignment.h new file mode 100644 index 0000000..621df82 --- /dev/null +++ b/qemu/hw/device-assignment.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2007, Neocleus Corporation. + * Copyright (c) 2007, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA 02111-1307 USA. + * + * Data structures for storing PCI state + * + * Adapted to kvm by Qumranet + * + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED]) + */ + +#ifndef __DEVICE_ASSIGNMENT_H__ +#define __DEVICE_ASSIGNMENT_H__ + +#include <sys/mman.h> +#include "qemu-common.h" +#include "pci.h" +#include <linux/types.h> + +#define DEVICE_ASSIGNMENT_DEBUG_PIO (0x01) +#define DEVICE_ASSIGNMENT_DEBUG_MMIO (0x02) + +/* From include/linux/pci.h in the kernel sources */ +#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) + +typedef uint32_t pciaddr_t; + +#define MAX_IO_REGIONS (6) + +typedef struct pci_region_s { + int type; /* Memory or port I/O */ + int valid; + pciaddr_t base_addr; + pciaddr_t size; /* size of the region */ + int resource_fd; +} PCIRegion; + +typedef struct pci_dev_s { + uint8_t bus, dev, func; /* Bus inside domain, device and function */ + int irq; /* IRQ number */ + uint16_t region_number; /* number of active regions */ + + /* Port I/O or MMIO Regions */ + PCIRegion regions[MAX_IO_REGIONS]; + int config_fd; +} PCIDevRegions; + +typedef struct assigned_dev_region_s { + target_phys_addr_t e_physbase; + uint32_t memory_index; + void *r_virtbase; /* mmapped access address */ + int num; /* our index within v_addrs[] */ + uint32_t e_size; /* emulated size of region in bytes */ + uint32_t r_size; /* real size of region in bytes */ + uint32_t debug; +} AssignedDevRegion; + +typedef struct assigned_dev_s { + PCIDevice dev; + int intpin; + uint8_t debug_flags; + AssignedDevRegion v_addrs[PCI_NUM_REGIONS]; + PCIDevRegions real_device; + int run; + int girq; + unsigned char h_busnr; + unsigned int h_devfn; + int bound; +} AssignedDevice; + +/* Initialization functions */ +int init_assigned_device(PCIBus *bus, int *index); +void add_assigned_device(const char *arg); +void assigned_dev_set_vector(int irq, int vector); +void assigned_dev_ack_mirq(int vector); + +#endif /* __DEVICE_ASSIGNMENT_H__ */ diff --git a/qemu/hw/isa.h b/qemu/hw/isa.h index 89b3004..c720f5e 100644 --- a/qemu/hw/isa.h +++ b/qemu/hw/isa.h @@ -1,5 +1,7 @@ /* ISA bus */ +#include "hw.h" + extern target_phys_addr_t isa_mem_base; int register_ioport_read(int start, int length, int size, diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c index 8a50096..59c2098 100644 --- a/qemu/hw/pc.c +++ b/qemu/hw/pc.c @@ -32,6 +32,7 @@ #include "smbus.h" #include "boards.h" #include "console.h" +#include "device-assignment.h" #include "qemu-kvm.h" @@ -1013,6 +1014,14 @@ static void pc_init1(ram_addr_t ram_size, int vga_ram_size, } } + /* Initialize assigned devices */ + if (pci_enabled) { + int r = -1; + do { + init_assigned_device(pci_bus, &r); + } while (r >= 0); + } + rtc_state = rtc_init(0x70, i8259[8]); qemu_register_boot_set(pc_boot_set, rtc_state); diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c index 07d37a8..e4e8386 100644 --- a/qemu/hw/pci.c +++ b/qemu/hw/pci.c @@ -50,6 +50,7 @@ struct PCIBus { static void pci_update_mappings(PCIDevice *d); static void pci_set_irq(void *opaque, int irq_num, int level); +void assigned_dev_update_irq(PCIDevice *d); target_phys_addr_t pci_mem_base; static int pci_irq_index; @@ -453,6 +454,12 @@ void pci_default_write_config(PCIDevice *d, val >>= 8; } +#ifdef KVM_CAP_DEVICE_ASSIGNMENT + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel() && + address >= 0x60 && address <= 0x63) + assigned_dev_update_irq(d); +#endif + end = address + len; if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) { /* if the command register is modified, we must modify the mappings */ @@ -560,6 +567,11 @@ static void pci_set_irq(void *opaque, int irq_num, int level) bus->set_irq(bus->irq_opaque, irq_num, bus->irq_count[irq_num] != 0); } +int pci_map_irq(PCIDevice *pci_dev, int pin) +{ + return pci_dev->bus->map_irq(pci_dev, pin); +} + /***********************************************************/ /* monitor info on PCI */ diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h index 60e4094..e11fbbf 100644 --- a/qemu/hw/pci.h +++ b/qemu/hw/pci.h @@ -81,6 +81,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int region_num, uint32_t size, int type, PCIMapIORegionFunc *map_func); +int pci_map_irq(PCIDevice *pci_dev, int pin); uint32_t pci_default_read_config(PCIDevice *d, uint32_t address, int len); void pci_default_write_config(PCIDevice *d, diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c index 6fbf47b..dc12c8a 100644 --- a/qemu/hw/piix_pci.c +++ b/qemu/hw/piix_pci.c @@ -243,6 +243,25 @@ static void piix3_set_irq(qemu_irq *pic, int irq_num, int level) } } +int piix3_get_pin(int pic_irq) +{ + int i; + for (i = 0; i < 4; i++) + if (piix3_dev->config[0x60+i] == pic_irq) + return i; + return -1; +} + +int piix_get_irq(int pin) +{ + if (piix3_dev) + return piix3_dev->config[0x60+pin]; + if (piix4_dev) + return piix4_dev->config[0x60+pin]; + + return 0; +} + static void piix3_reset(PCIDevice *d) { uint8_t *pci_conf = d->config; diff --git a/qemu/qemu-kvm-x86.c b/qemu/qemu-kvm-x86.c index 5daedd1..5123e52 100644 --- a/qemu/qemu-kvm-x86.c +++ b/qemu/qemu-kvm-x86.c @@ -530,6 +530,9 @@ struct kvm_para_features { #ifdef KVM_CAP_CR3_CACHE { KVM_CAP_CR3_CACHE, KVM_FEATURE_CR3_CACHE }, #endif +#ifdef KVM_CAP_PV_DMA + { KVM_CAP_PV_DMA, KVM_FEATURE_DMA_OP }, +#endif { -1, -1 } }; diff --git a/qemu/vl.c b/qemu/vl.c index 022b3b8..bab720d 100644 --- a/qemu/vl.c +++ b/qemu/vl.c @@ -37,6 +37,7 @@ #include "qemu-char.h" #include "block.h" #include "audio/audio.h" +#include "hw/device-assignment.h" #include "migration.h" #include "balloon.h" #include "qemu-kvm.h" @@ -8478,6 +8479,12 @@ static void help(int exitcode) #endif "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n" "-no-kvm-pit disable KVM kernel mode PIT\n" +#if defined(TARGET_I386) || defined(TARGET_X86_64) + "-pcidevice host=bus:dev.func[,dma=none][,name=\"string\"]\n" + " expose a PCI device to the guest OS.\n" + " dma=none: don't perform any dma translations (default is to use an iommu)\n" + " 'string' is used in log output.\n" +#endif #endif #ifdef TARGET_I386 "-std-vga simulate a standard VGA card with VESA Bochs Extensions\n" @@ -8601,6 +8608,9 @@ enum { QEMU_OPTION_no_kvm, QEMU_OPTION_no_kvm_irqchip, QEMU_OPTION_no_kvm_pit, +#if defined(TARGET_I386) || defined(TARGET_X86_64) + QEMU_OPTION_pcidevice, +#endif QEMU_OPTION_no_reboot, QEMU_OPTION_no_shutdown, QEMU_OPTION_show_cursor, @@ -8689,6 +8699,9 @@ const QEMUOption qemu_options[] = { #endif { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip }, { "no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit }, +#if defined(TARGET_I386) || defined(TARGET_X86_64) + { "pcidevice", HAS_ARG, QEMU_OPTION_pcidevice }, +#endif #endif #if defined(TARGET_PPC) || defined(TARGET_SPARC) { "g", 1, QEMU_OPTION_g }, @@ -9595,6 +9608,11 @@ int main(int argc, char **argv) kvm_pit = 0; break; } +#if defined(TARGET_I386) || defined(TARGET_X86_64) + case QEMU_OPTION_pcidevice: + add_assigned_device(optarg); + break; +#endif #endif case QEMU_OPTION_usb: usb_enabled = 1; -- 1.5.4.3 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html