Amit Shah wrote: > From: Or Sagi <[EMAIL PROTECTED]> > From: Nir Peleg <[EMAIL PROTECTED]> > From: Amit Shah <[EMAIL PROTECTED]> > From: Glauber de Oliveira Costa <[EMAIL PROTECTED]> > > We can assign a device from the host machine to a guest. The original > code comes > from Neocleus. > > A new command-line option, -pcidevice is added. > For example, to invoke it for an Ethernet device sitting at > PCI bus:dev.fn 04:08.0 with host IRQ 18, use this: > > -pcidevice Ethernet/04:08.0-18 > > The host ethernet driver is to be removed before doing the > passthrough.
This operation is rough. Do you have any plan to improve it? For example, don't load drivers for the devices which will be assigned to guests when host booting, or ideally unbind driver devices dynamically before assignment. Randy (Weidong) > > If kvm uses the in-kernel irqchip, interrupts are routed to > the guest via the kvm module (accompanied kernel changes are > necessary). > If -no-kvm-irqchip is used, the 'irqhook' module, also included here, > is to be used. > > Signed-off-by: Amit Shah <[EMAIL PROTECTED]> > --- > Makefile | 10 +- > irqhook/Kbuild | 3 + > irqhook/Makefile | 25 ++ > irqhook/irqhook_main.c | 215 ++++++++++++++ > kernel/Makefile | 2 + > libkvm/libkvm-x86.c | 9 +- > libkvm/libkvm.h | 16 + > qemu/Makefile.target | 1 + > qemu/hw/apic.c | 4 + > qemu/hw/isa.h | 2 + > qemu/hw/pc.c | 4 + > qemu/hw/pci-passthrough.c | 704 > +++++++++++++++++++++++++++++++++++++++++++++ > qemu/hw/pci-passthrough.h | 119 ++++++++ qemu/hw/pci.c > | 13 + qemu/hw/pci.h | 1 + > qemu/hw/piix_pci.c | 19 ++ > qemu/vl.c | 16 + > tools/pci_barsize.c | 53 ++++ > tools/pci_mmio.c | 82 ++++++ > 19 files changed, 1293 insertions(+), 5 deletions(-) > create mode 100644 irqhook/Kbuild > create mode 100644 irqhook/Makefile > create mode 100644 irqhook/irqhook_main.c > create mode 100644 qemu/hw/pci-passthrough.c > create mode 100644 qemu/hw/pci-passthrough.h > create mode 100644 tools/pci_barsize.c > create mode 100644 tools/pci_mmio.c > > diff --git a/Makefile b/Makefile > index 48a8dff..d4246fd 100644 > --- a/Makefile > +++ b/Makefile > @@ -7,16 +7,16 @@ rpmrelease = devel > > sane-arch = $(subst i386,x86,$(subst x86_64,x86,$(ARCH))) > > -.PHONY: kernel user libkvm qemu bios vgabios extboot clean libfdt > +.PHONY: kernel irqhook user libkvm qemu bios vgabios extboot clean > libfdt > > all: libkvm qemu > ifneq '$(filter $(ARCH), x86_64 i386 ia64)' '' > - all: $(if $(WANT_MODULE), kernel) user > + all: $(if $(WANT_MODULE), kernel irqhook) user > endif > > kcmd = $(if $(WANT_MODULE),,@\#) > > -qemu kernel user libkvm: > +qemu kernel user irqhook libkvm: > $(MAKE) -C $@ > > qemu: libkvm > @@ -77,6 +77,7 @@ install-rpm: > > install: > $(kcmd)make -C kernel DESTDIR="$(DESTDIR)" install > + $(kcmd)make -C irqhook DESTDIR="$(DESTDIR)" install > make -C libkvm DESTDIR="$(DESTDIR)" install > make -C qemu DESTDIR="$(DESTDIR)" install > > @@ -97,6 +98,7 @@ srpm: > tar czf $(RPMTOPDIR)/SOURCES/user.tar.gz user > tar czf $(RPMTOPDIR)/SOURCES/libkvm.tar.gz libkvm > tar czf $(RPMTOPDIR)/SOURCES/kernel.tar.gz kernel > + tar czf $(RPMTOPDIR)/SOURCES/irqhook.tar.gz irqhook > tar czf $(RPMTOPDIR)/SOURCES/scripts.tar.gz scripts > tar czf $(RPMTOPDIR)/SOURCES/extboot.tar.gz extboot > cp Makefile configure kvm_stat $(RPMTOPDIR)/SOURCES > @@ -104,7 +106,7 @@ srpm: > $(RM) $(tmpspec) > > clean: > - for i in $(if $(WANT_MODULE), kernel) user libkvm qemu libfdt; do \ > + for i in $(if $(WANT_MODULE), kernel irqhook) user libkvm qemu > libfdt; do \ make -C $$i clean; \ > done > > diff --git a/irqhook/Kbuild b/irqhook/Kbuild > new file mode 100644 > index 0000000..9af75a4 > --- /dev/null > +++ b/irqhook/Kbuild > @@ -0,0 +1,3 @@ > +EXTRA_CFLAGS := -I$(src)/include > +obj-m := irqhook.o > +irqhook-objs := irqhook_main.o > diff --git a/irqhook/Makefile b/irqhook/Makefile > new file mode 100644 > index 0000000..3b1d851 > --- /dev/null > +++ b/irqhook/Makefile > @@ -0,0 +1,25 @@ > +include ../config.mak > + > +KVERREL = $(patsubst /lib/modules/%/build,%,$(KERNELDIR)) > + > +DESTDIR= > + > +INSTALLDIR = $(patsubst %/build,%/extra,$(KERNELDIR)) > + > +rpmrelease = devel > + > +LINUX = ../linux-2.6 > + > +all:: > + $(MAKE) -C $(KERNELDIR) M=`pwd` "$$@" > + > +#sync: > +# rsync --exclude='*.mod.c' "$(LINUX)"/drivers/irqhook/*.[ch] . > + > +install: > + mkdir -p $(DESTDIR)/$(INSTALLDIR) > + cp *.ko $(DESTDIR)/$(INSTALLDIR) > + /sbin/depmod -a > + > +clean: > + $(MAKE) -C $(KERNELDIR) M=`pwd` $@ > diff --git a/irqhook/irqhook_main.c b/irqhook/irqhook_main.c > new file mode 100644 > index 0000000..0f93d17 > --- /dev/null > +++ b/irqhook/irqhook_main.c > @@ -0,0 +1,215 @@ > +#include <linux/module.h> > +#include <linux/kernel.h> > +#include <linux/fs.h> > +#include <linux/bitmap.h> > +#include <linux/interrupt.h> > +#include <linux/spinlock.h> > +#include <linux/miscdevice.h> > +#include <linux/pci.h> > + > +#include <asm/uaccess.h> > + > +#define irqh_VERSION "0.0.1" > +#define irqh_MODULE_NAME "irqhook" > +#define irqh_DRIVER_NAME irqh_MODULE_NAME " HW IRQ hook " > irqh_VERSION + > +// based on earlier proprietary Tutis code; this modified version > goes under GPL +MODULE_AUTHOR("Nir Peleg - Tutis"); > +MODULE_DESCRIPTION("IRQ hook driver"); > +MODULE_LICENSE("GPL"); > + > +//#define irqh_DEBUG /* define to enable copious debugging info */ > + > +#ifdef irqh_DEBUG > +#define DPRINTK(fmt, args...) printk("<1>" "%s: " fmt, __FUNCTION__ > , ## args) +#else > +#define DPRINTK(fmt, args...) > +#endif > + > +#define ERROR(fmt, args...) printk("<1>" "%s: " fmt, __FUNCTION__ , > ## args) + > +static spinlock_t irqh_lock; > +static wait_queue_head_t irqh_proc_list; > + > +static DECLARE_BITMAP(pending, NR_IRQS); > +static DECLARE_BITMAP(handled, NR_IRQS); > + > +#define irqh_on(which, bit) test_bit(bit, which) > +#define irqh_set(which, bit) set_bit(bit, which) > +#define irqh_clear(which, bit) clear_bit(bit, which) > +#define irqh_ffs(which) find_first_bit(which, NR_IRQS) > + > +static irqreturn_t > +irqh_interrupt(int irq, void *p) > +{ > + unsigned long flags; > + > + DPRINTK("interrupt: %d\n", irq); > + if (!irqh_on(handled, irq)) > + return IRQ_HANDLED; > + spin_lock_irqsave(&irqh_lock, flags); > + irqh_set(pending, irq); > + wake_up_interruptible(&irqh_proc_list); > + spin_unlock_irqrestore(&irqh_lock, flags); > + disable_irq_nosync(irq); > + return IRQ_HANDLED; > +} > + > +static ssize_t > +irqh_dev_write(struct file *fp, const char *buf, size_t size, loff_t > *offp) +{ > + int n, device, func, devfn; > + char arg[32], *cp, *cp1; > + struct pci_dev *pdp = 0; > + > + DPRINTK("ENTER\n"); > + if ((fp->f_mode & FMODE_WRITE) == 0 || size > sizeof arg) > + return -EINVAL; > + > + if (size >= sizeof arg || copy_from_user(arg, buf, size)) > + return -EFAULT; > + arg[size] = 0; > + cp = arg + (arg[0] == '+' || arg[0] == '-'); > + n = simple_strtol(cp, &cp1, 0); > + if (*cp1 == ':') { > + device = simple_strtol(cp1+1, &cp1, 0); > + func = simple_strtol(cp1+1, NULL, 0); > + DPRINTK("PCI dev %d:%d.%d\n", n, device, func); > + devfn = PCI_DEVFN(device, func); > + for_each_pci_dev(pdp) { > + if (pdp->bus->number == n && pdp->devfn == devfn) { > + n = pdp->irq; > + goto found; > + } > + } > + ERROR("PCI device not found\n"); > + return -ENOENT; > + } > + found: > + DPRINTK("IRQ %d\n", n); > + if (arg[0] == '+') { > + if (pdp) { > + if (pci_enable_device(pdp)) > + ERROR("device not enabled\n"); > + if ((unsigned)(n = pdp->irq) >= NR_IRQS) { > + ERROR("device has invalid IRQ set\n"); > + return -EINVAL; > + } > + } > + if (irqh_on(handled, n)) > + return -EBUSY; > + if (request_irq(n, irqh_interrupt, IRQF_SHARED, irqh_MODULE_NAME, > (void *)irqh_interrupt)) { + ERROR("request_irq failed\n"); > + return -EIO; > + } > + printk("Bound machine irq %d\n", n); > + irqh_set(handled, n); > + goto done; > + } > + if ((unsigned)n >= NR_IRQS) > + return -EINVAL; > + if (arg[0] == '-') { > + if (pdp) > + pci_disable_device(pdp); > + free_irq(n, (void *)irqh_interrupt); > + irqh_clear(handled, n); > + } else > + enable_irq(n); > + > + done: > + DPRINTK("DONE\n"); > + return size; > +} > + > +static ssize_t > +irqh_dev_read(struct file *fp, char *buf, size_t size, loff_t *offp) > +{ > + char b[20]; > + int m = -ERESTARTSYS, n; > + > + DECLARE_WAITQUEUE(wait, current); > + > + DPRINTK("ENTER\n"); > + if ((fp->f_mode & FMODE_READ) == 0) > + return -EINVAL; > + spin_lock_irq(&irqh_lock); > + while (!signal_pending(current)) { > + if ((n = irqh_ffs(pending)) < NR_IRQS) { > + if ((m = sprintf(b, "%d", n) + 1) > size) > + m = size; > + if (copy_to_user(buf, b, m)) > + m = -EFAULT; > + else > + irqh_clear(pending, n); > + break; > + } > + if (fp->f_flags & O_NONBLOCK) { > + m = -EWOULDBLOCK; > + break; > + } > + add_wait_queue(&irqh_proc_list, &wait); > + set_current_state(TASK_INTERRUPTIBLE); > + spin_unlock_irq(&irqh_lock); > + schedule(); > + spin_lock_irq(&irqh_lock); > + current->state = TASK_RUNNING; > + remove_wait_queue(&irqh_proc_list, &wait); > + } > + spin_unlock_irq(&irqh_lock); > + return m; > +} > + > +static struct file_operations irqh_chrdev_ops = { > + owner: THIS_MODULE, > + read: irqh_dev_read, > + write: irqh_dev_write, > +}; > + > +#define irqh_MISCDEV_MINOR MISC_DYNAMIC_MINOR > + > +static struct miscdevice irqh_miscdev = { > + irqh_MISCDEV_MINOR, > + irqh_MODULE_NAME, > + &irqh_chrdev_ops, > +}; > + > +static int __init > +irqh_init(void) > +{ > + int rc; > + > + DPRINTK("ENTER\n"); > + > + if ((rc = misc_register(&irqh_miscdev))) { > + printk(KERN_ERR irqh_MODULE_NAME ": " "cannot register misc > device\n"); + DPRINTK("EXIT, returning %d\n", rc); > + return rc; > + } > + > + printk(KERN_INFO irqh_DRIVER_NAME " loaded\n"); > + > + init_waitqueue_head(&irqh_proc_list); > + spin_lock_init(&irqh_lock); > + > + DPRINTK("EXIT, returning 0\n"); > + return 0; > +} > + > +static void __exit > +irqh_cleanup(void) > +{ > + int n; > + > + DPRINTK("ENTER\n"); > + > + while ((n = irqh_ffs(handled)) < NR_IRQS) { > + irqh_clear(handled, n); > + free_irq(n, (void *)irqh_interrupt); > + } > + misc_deregister (&irqh_miscdev); > + > + DPRINTK("EXIT\n"); > +} > + > +module_init (irqh_init); > +module_exit (irqh_cleanup); > diff --git a/kernel/Makefile b/kernel/Makefile > index 69b4981..ebf180c 100644 > --- a/kernel/Makefile > +++ b/kernel/Makefile > @@ -46,6 +46,8 @@ header-sync: > rm -rf $T > rsync -R \ > "$(LINUX)"/./include/linux/kvm*.h \ > + "$(LINUX)"/./include/linux/ioport.h \ > + "$(LINUX)"/./include/linux/compiler.h \ > "$(LINUX)"/./include/asm-*/kvm*.h \ > $T/ > > diff --git a/libkvm/libkvm-x86.c b/libkvm/libkvm-x86.c > index d46fdcc..84abd6b 100644 > --- a/libkvm/libkvm-x86.c > +++ b/libkvm/libkvm-x86.c > @@ -126,6 +126,14 @@ static int kvm_init_tss(kvm_context_t kvm) > return 0; > } > > +#ifdef KVM_CAP_PCI_PASSTHROUGH > +int kvm_assign_pci_pt_device(kvm_context_t kvm, > + struct kvm_pci_passthrough_dev *pci_pt_dev) > +{ > + return ioctl(kvm->vm_fd, KVM_ASSIGN_PCI_PT_DEV, pci_pt_dev); > +} > +#endif > + > int kvm_arch_create_default_phys_mem(kvm_context_t kvm, > unsigned long phys_mem_bytes, > void **vm_mem) > @@ -431,7 +439,6 @@ void kvm_show_code(kvm_context_t kvm, int vcpu) > fprintf(stderr, "code:%s\n", code_str); > } > > - > /* > * Returns available msr list. User must free. > */ > diff --git a/libkvm/libkvm.h b/libkvm/libkvm.h > index 63183b8..0dd4334 100644 > --- a/libkvm/libkvm.h > +++ b/libkvm/libkvm.h > @@ -12,6 +12,7 @@ > #endif > > #include <linux/kvm.h> > +#include <linux/kvm_para.h> > > #include <signal.h> > > @@ -626,4 +627,19 @@ int kvm_enable_vapic(kvm_context_t kvm, int > vcpu, uint64_t vapic); > > #endif > > +#ifdef KVM_CAP_PCI_PASSTHROUGH > +/*! > + * \brief Notifies host kernel about assigning a PCI device to the > guest + * > + * Used for PCI passthrough, this function notifies the host kernel > + * about the assigning of the physical PCI device and the guest PCI > + * parameters. Also called when the guest updates the irq number for > a + * passthrough device > + * > + * \param kvm Pointer to the current kvm_context > + * \param pci_pt_dev Parameters like irq, PCI bus, devfn number, etc > + */ > +int kvm_assign_pci_pt_device(kvm_context_t kvm, > + struct kvm_pci_passthrough_dev *pci_pt_dev); > +#endif > #endif > diff --git a/qemu/Makefile.target b/qemu/Makefile.target > index 66c1fb1..1c9d445 100644 > --- a/qemu/Makefile.target > +++ b/qemu/Makefile.target > @@ -615,6 +615,7 @@ OBJS+= ide.o pckbd.o ps2.o vga.o $(SOUND_HW) dma.o > OBJS+= fdc.o mc146818rtc.o serial.o i8259.o i8254.o pcspk.o pc.o > OBJS+= cirrus_vga.o apic.o parallel.o acpi.o piix_pci.o > OBJS+= usb-uhci.o vmmouse.o vmport.o vmware_vga.o extboot.o > +OBJS+= pci-passthrough.o > ifeq ($(USE_KVM_PIT), 1) > OBJS+= i8254-kvm.o > endif > diff --git a/qemu/hw/apic.c b/qemu/hw/apic.c > index a14cab2..3f83e64 100644 > --- a/qemu/hw/apic.c > +++ b/qemu/hw/apic.c > @@ -23,6 +23,8 @@ > > #include "qemu-kvm.h" > > +#include "pci-passthrough.h" > + > //#define DEBUG_APIC > //#define DEBUG_IOAPIC > > @@ -389,6 +391,7 @@ static void apic_eoi(APICState *s) > /* XXX: send the EOI packet to the APIC bus to allow the I/O > APIC to set the remote IRR bit for level triggered > interrupts. */ apic_update_irq(s); > + pt_ack_mirq(isrv); > } > > static void apic_get_delivery_bitmask(uint32_t *deliver_bitmask, > @@ -1144,6 +1147,7 @@ static void ioapic_mem_writel(void *opaque, > target_phys_addr_t addr, uint32_t va } else { > s->ioredtbl[index] &= ~0xffffffffULL; > s->ioredtbl[index] |= val; > + pt_set_vector(index, (val << 24) >> 24); > } > ioapic_service(s); > } > diff --git a/qemu/hw/isa.h b/qemu/hw/isa.h > index 89b3004..c720f5e 100644 > --- a/qemu/hw/isa.h > +++ b/qemu/hw/isa.h > @@ -1,5 +1,7 @@ > /* ISA bus */ > > +#include "hw.h" > + > extern target_phys_addr_t isa_mem_base; > > int register_ioport_read(int start, int length, int size, > diff --git a/qemu/hw/pc.c b/qemu/hw/pc.c > index da4a608..a9b7e60 100644 > --- a/qemu/hw/pc.c > +++ b/qemu/hw/pc.c > @@ -32,6 +32,7 @@ > #include "smbus.h" > #include "boards.h" > #include "console.h" > +#include "pci-passthrough.h" > > #include "qemu-kvm.h" > > @@ -997,6 +998,9 @@ static void pc_init1(ram_addr_t ram_size, int > vga_ram_size, } > } > > + /* Initialize pass-through */ > + pt_init(pci_bus); > + > rtc_state = rtc_init(0x70, i8259[8]); > > register_ioport_read(0x92, 1, 1, ioport92_read, NULL); > diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c > new file mode 100644 > index 0000000..b1413af > --- /dev/null > +++ b/qemu/hw/pci-passthrough.c > @@ -0,0 +1,704 @@ > +/* > + * Copyright (c) 2007, Neocleus Corporation. > + * > + * This program is free software; you can redistribute it and/or > modify it + * under the terms and conditions of the GNU General > Public License, + * version 2, as published by the Free Software > Foundation. + * > + * This program is distributed in the hope it will be useful, but > WITHOUT + * ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU > General Public License for + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with + * this program; if not, write to the Free Software > Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA > 02111-1307 USA. + * > + * > + * Pass a PCI device from the host to a guest VM. > + * > + * Adapted for KVM by Qumranet. > + * > + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) > + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) > + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED]) > + */ > +#include <stdio.h> > +#include <pthread.h> > +#include <sys/io.h> > +#include <sys/ioctl.h> > +#include <linux/types.h> > + > +typedef __u64 resource_size_t; > +#define __deprecated > +#include <linux/ioport.h> > + > +#include "pci-passthrough.h" > +#include "irq.h" > + > +#include "qemu-kvm.h" > +#include <linux/kvm_para.h> > +extern kvm_context_t kvm_context; > +extern FILE *logfile; > + > +CPUReadMemoryFunc *pt_mmio_read_cb[3] = { > + pt_mmio_readb, > + pt_mmio_readw, > + pt_mmio_readl > +}; > + > +CPUWriteMemoryFunc *pt_mmio_write_cb[3] = { > + pt_mmio_writeb, > + pt_mmio_writew, > + pt_mmio_writel > +}; > + > +//#define PT_DEBUG > + > +#ifdef PT_DEBUG > +#define DEBUG(fmt, args...) fprintf(stderr, "%s: " fmt, __FUNCTION__ > , ## args) +#else > +#define DEBUG(fmt, args...) > +#endif > + > +#define pt_mmio_write(suffix, type) \ > +void pt_mmio_write##suffix(void *opaque, target_phys_addr_t e_phys, \ > + uint32_t value) \ > +{ \ > + pt_region_t *r_access = (pt_region_t *)opaque; \ > + void *r_virt = (u8 *)r_access->r_virtbase + \ > + (e_phys - r_access->e_physbase); \ > + if (r_access->debug & PT_DEBUG_MMIO) { \ > + fprintf(logfile, "pt_mmio_write" #suffix \ > + ": e_physbase=%p e_phys=%p r_virt=%p value=%08x\n", \ > + (void *)r_access->e_physbase, (void *)e_phys, \ > + r_virt, value); \ > + } \ > + *(type *)r_virt = (type)value; \ > +} > + > +pt_mmio_write(b, u8) > +pt_mmio_write(w, u16) > +pt_mmio_write(l, u32) > + > +#define pt_mmio_read(suffix, type) \ > +uint32_t pt_mmio_read##suffix(void *opaque, target_phys_addr_t > e_phys) \ +{ \ > + pt_region_t *r_access = (pt_region_t *)opaque; \ > + void *r_virt = (u8 *)r_access->r_virtbase + \ > + (e_phys - r_access->e_physbase); \ > + uint32_t value = (u32) (*(type *) r_virt); \ > + if (r_access->debug & PT_DEBUG_MMIO) { \ > + fprintf(logfile, \ > + "pt_mmio_read" #suffix ": e_physbase=%p " \ > + "e_phys=%p r_virt=%p value=%08x\n", \ > + (void *)r_access->e_physbase, \ > + (void *)e_phys, r_virt, value); \ > + } \ > + return value; \ > +} > + > +pt_mmio_read(b, u8) > +pt_mmio_read(w, u16) > +pt_mmio_read(l, u32) > + > +#define pt_ioport_write(suffix) \ > +void pt_ioport_write##suffix(void *opaque, uint32_t addr, uint32_t > value) \ +{ \ > + pt_region_t *r_access = (pt_region_t *)opaque; \ > + uint32_t r_pio = (unsigned long)r_access->r_virtbase \ > + + (addr - r_access->e_physbase); \ > + if (r_access->debug & PT_DEBUG_PIO) { \ > + fprintf(logfile, "pt_ioport_write" #suffix \ > + ": r_pio=%08x e_physbase=%08x" \ > + " r_virtbase=%08lx value=%08x\n", \ > + r_pio, (int)r_access->e_physbase, \ > + (unsigned long)r_access->r_virtbase, value); \ > + } \ > + out##suffix(value, r_pio); \ > +} > + > +pt_ioport_write(b) > +pt_ioport_write(w) > +pt_ioport_write(l) > + > +#define pt_ioport_read(suffix) \ > +uint32_t pt_ioport_read##suffix(void *opaque, uint32_t addr) \ > +{ \ > + pt_region_t *r_access = (pt_region_t *)opaque; \ > + uint32_t r_pio = (addr - r_access->e_physbase) \ > + + (unsigned long)r_access->r_virtbase; \ > + uint32_t value = in##suffix(r_pio); \ > + if (r_access->debug & PT_DEBUG_PIO) { \ > + fprintf(logfile, "pt_ioport_read" #suffix \ > + ": r_pio=%08x e_physbase=%08x r_virtbase=%08lx "\ > + "value=%08x\n", \ > + r_pio, (int)r_access->e_physbase, \ > + (unsigned long)r_access->r_virtbase, value); \ > + } \ > + return (value); \ > +} > + > +pt_ioport_read(b) > +pt_ioport_read(w) > +pt_ioport_read(l) > + > +static void pt_iomem_map(PCIDevice * d, int region_num, > + uint32_t e_phys, uint32_t e_size, int type) > +{ > + pt_dev_t *r_dev = (pt_dev_t *) d; > + > + r_dev->v_addrs[region_num].e_physbase = e_phys; > + > + DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n", > + e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size, > + region_num); > + > + cpu_register_physical_memory(e_phys, > + r_dev->dev.io_regions[region_num].size, > + r_dev->v_addrs[region_num].memory_index); > +} > + > + > +static void pt_ioport_map(PCIDevice * pci_dev, int region_num, > + uint32_t addr, uint32_t size, int type) > +{ > + pt_dev_t *r_dev = (pt_dev_t *) pci_dev; > + int i; > + uint32_t ((*rf[])(void *, uint32_t)) = { pt_ioport_readb, > + pt_ioport_readw, > + pt_ioport_readl > + }; > + void ((*wf[])(void *, uint32_t, uint32_t)) = { pt_ioport_writeb, > + pt_ioport_writew, > + pt_ioport_writel > + }; > + > + r_dev->v_addrs[region_num].e_physbase = addr; > + DEBUG("pt_ioport_map: address=0x%x type=0x%x len=%d" > + "region_num=%d \n", addr, type, size, region_num); > + > + for (i = 0; i < 3; i++) { > + register_ioport_write(addr, size, 1<<i, wf[i], > + (void *) (r_dev->v_addrs + region_num)); > + register_ioport_read(addr, size, 1<<i, rf[i], > + (void *) (r_dev->v_addrs + region_num)); > + } > +} > + > +static void pt_pci_write_config(PCIDevice * d, uint32_t address, > uint32_t val, + int len) > +{ > + int fd; > + > + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), (uint16_t) > address, + val, len); > + > + if (address == 0x4) > + pci_default_write_config(d, address, val, len); > + > + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || > + address == 0x3c || address == 0x3d) { > + /* used for update-mappings (BAR emulation) */ > + pci_default_write_config(d, address, val, len); > + return; > + } > + > + DEBUG("NON BAR (%x.%x): address=%04x val=0x%08x len=%d\n", > + ((d->devfn >> 3) & 0x1F), (d->devfn & 0x7), (uint16_t) > address, + val, len); > + fd = ((pt_dev_t *)d)->real_device.config_fd; > + lseek(fd, address, SEEK_SET); > + write(fd, &val, len); > +} > + > +static uint32_t pt_pci_read_config(PCIDevice *d, uint32_t address, > int len) +{ > + uint32_t val = 0; > + int fd; > + > + if ((address >= 0x10 && address <= 0x24) || address == 0x34 || > + address == 0x3c || address == 0x3d) { > + val = pci_default_read_config(d, address, len); > + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, > + len); > + return (val); > + } > + > + /* vga specific, remove later */ > + if (address == 0xFC) > + goto do_log; > + > + fd = ((pt_dev_t *)d)->real_device.config_fd; > + lseek(fd, address, SEEK_SET); > + read(fd, &val, len); > + > + do_log: > + DEBUG("(%x.%x): address=%04x val=0x%08x len=%d\n", > + (d->devfn >> 3) & 0x1F, (d->devfn & 0x7), address, val, len); > + > + /* kill the special capabilities */ > + if (address == 4 && len == 4) > + val &= ~0x100000; > + else if (address == 6) > + val &= ~0x10; > + > + return (val); > +} > + > + > +static int pt_register_regions(pci_region_t * io_regions, > + unsigned long regions_num, pt_dev_t * pci_dev) > +{ > + uint32_t i; > + pci_region_t *cur_region = io_regions; > + > + for (i = 0; i < regions_num; i++, cur_region++) { > + if (!cur_region->valid) > + continue; > +#ifdef PT_DEBUG > + pci_dev->v_addrs[i].debug |= PT_DEBUG_MMIO | PT_DEBUG_PIO; > +#endif > + pci_dev->v_addrs[i].num = i; > + > + /* handle memory io regions */ > + if (cur_region->type & IORESOURCE_MEM) { > + int t = cur_region->type & IORESOURCE_PREFETCH > + ? PCI_ADDRESS_SPACE_MEM_PREFETCH > + : PCI_ADDRESS_SPACE_MEM; > + > + /* map physical memory */ > + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; > + pci_dev->v_addrs[i].r_virtbase = > + mmap(NULL, (cur_region->size + 0xFFF) & 0xFFFFF000, > + PROT_WRITE | PROT_READ, MAP_SHARED, > + cur_region->resource_fd, (off_t) 0); > + > + if ((void *) -1 == pci_dev->v_addrs[i].r_virtbase) { > + fprintf(logfile, "Error: Couldn't mmap 0x%x!\n", > + (uint32_t) (cur_region->base_addr)); > + return (-1); > + } > + > + /* add offset */ > + pci_dev->v_addrs[i].r_virtbase += > + (cur_region->base_addr & 0xFFF); > + > + pci_register_io_region((PCIDevice *) pci_dev, i, > + cur_region->size, t, > + pt_iomem_map); > + > + pci_dev->v_addrs[i].memory_index = > + cpu_register_io_memory(0, pt_mmio_read_cb, > + pt_mmio_write_cb, > + (void *) &(pci_dev->v_addrs[i])); > + > + continue; > + } > + /* handle port io regions */ > + > + pci_register_io_region((PCIDevice *) pci_dev, i, > + cur_region->size, PCI_ADDRESS_SPACE_IO, > + pt_ioport_map); > + > + pci_dev->v_addrs[i].e_physbase = cur_region->base_addr; > + pci_dev->v_addrs[i].r_virtbase = (void > *)(long)cur_region->base_addr; + pci_dev->v_addrs[i].memory_index = > 0; // not relevant for port io + } > + > + /* success */ > + return (0); > + > +} > + > +static int > +pt_get_real_device(pt_dev_t *pci_dev, uint8_t r_bus, uint8_t r_dev, > + uint8_t r_func) > +{ > + char dir[128], name[128], comp[16]; > + int fd, r = 0; > + FILE *f; > + unsigned long long start, end, size, flags; > + pci_region_t *rp; > + pci_dev_t *dev = &pci_dev->real_device; > + > + dev->region_number = 0; > + > + sprintf(dir, "/sys/bus/pci/devices/0000:%02x:%02x.%x/", > + r_bus, r_dev, r_func); > + strcpy(name, dir); > + strcat(name, "config"); > + if ((fd = open(name, O_RDWR)) == -1) { > + fprintf(logfile, "%s: %m\n", name); > + return 1; > + } > + dev->config_fd = fd; > + read(fd, pci_dev->dev.config, sizeof pci_dev->dev.config); > + > + strcpy(name, dir); > + strcat(name, "resource"); > + if ((f = fopen(name, "r")) == NULL) { > + fprintf(logfile, "%s: %m\n", name); > + return 1; > + } > + > + for (r = 0; fscanf(f, "%lli %lli %lli\n", &start, &end, &flags) == > 3; + r++) { > + rp = dev->regions + r; > + rp->valid = 0; > + size = end - start + 1; > + flags &= IORESOURCE_IO | IORESOURCE_MEM | IORESOURCE_PREFETCH; > + if (size == 0 || (flags & ~IORESOURCE_PREFETCH) == 0) > + continue; > + if (flags & IORESOURCE_MEM) { > + flags &= ~IORESOURCE_IO; > + sprintf(comp, "resource%d", r); > + strcpy(name, dir); > + strcat(name, comp); > + if ((fd = open(name, O_RDWR)) == -1) > + continue; // probably ROM > + rp->resource_fd = fd; > + } else > + flags &= ~IORESOURCE_PREFETCH; > + > + rp->type = flags; > + rp->valid = 1; > + rp->base_addr = start; > + rp->size = size; > + DEBUG("region %d size %d start 0x%x type %d " > + "resource_fd %d\n", r, rp->size, start, rp->type, > + rp->resource_fd); > + } > + fclose(f); > + > + dev->region_number = r; > + return 0; > +} > + > +static int pt_bind_mirq(int bus, int dev, int fn); > + > +static pt_dev_t *register_real_device(PCIBus * e_bus, const char > *e_dev_name, + int e_devfn, uint8_t r_bus, uint8_t r_dev, > + uint8_t r_func, uint32_t machine_irq) > +{ > + int rc; > + pt_dev_t *pci_dev; > + uint8_t e_device, e_intx; > + > + DEBUG("register_real_device: Registering real physical " > + "device %s (devfn=0x%x)\n", e_dev_name, e_devfn); > + > + pci_dev = (pt_dev_t *) pci_register_device(e_bus, e_dev_name, > + sizeof(pt_dev_t), e_devfn, > + pt_pci_read_config, > + pt_pci_write_config); > + > + if (NULL == pci_dev) { > + fprintf(logfile, "register_real_device: Error: Couldn't " > + "register real device %s\n", e_dev_name); > + return (NULL); > + } > + if (pt_get_real_device(pci_dev, r_bus, r_dev, r_func)) { > + fprintf(logfile, "register_real_device: Error: Couldn't get " > + "real device (%s)!\n", e_dev_name); > + return NULL; > + } > + > + /* handle real device's MMIO/PIO BARs */ > + if (pt_register_regions(pci_dev->real_device.regions, > + pci_dev->real_device.region_number, pci_dev)) > + return (NULL); > + > + /* handle interrupt routing */ > + e_device = (pci_dev->dev.devfn >> 3) & 0x1f; > + e_intx = pci_dev->dev.config[0x3d] - 1; > + pci_dev->intpin = e_intx; > + pci_dev->run = 0; > + pci_dev->mirq = machine_irq; > + pci_dev->girq = 0; > + > + /* bind machine_irq to device */ > + if (machine_irq && ((kvm_enabled() && !qemu_kvm_irqchip_in_kernel()) > + || !kvm_enabled())) { > + DEBUG(logfile, "Binding mirq %u to device=0x%x intpin=0x%x\n", > + machine_irq, e_device, pci_dev->intpin); > + rc = pt_bind_mirq(r_bus, r_dev, r_func); > + if (rc) { > + fprintf(logfile, "pt_bind %d failed rc=%d\n", > + pci_dev->mirq, rc); > + return NULL; > + } > + sprintf(pci_dev->sirq, "%d", pci_dev->mirq); > + } > + > +#ifdef KVM_CAP_PCI_PASSTHROUGH > + if (kvm_enabled()) { > + struct kvm_pci_passthrough_dev pci_pt_dev; > + > + pci_pt_dev.guest.busnr = pci_bus_num(e_bus); > + pci_pt_dev.guest.devfn = PCI_DEVFN(e_device, r_func); > + pci_pt_dev.host.busnr = r_bus; > + pci_pt_dev.host.devfn = PCI_DEVFN(r_dev, r_func); > + > + if (qemu_kvm_irqchip_in_kernel()) { > + /* We'll set the value of the guest irq as and when > + * the piix config gets updated. See pci_pt_update_irq > + */ > + pci_pt_dev.guest.irq = 0; > + pci_pt_dev.host.irq = machine_irq; > + } > + rc = kvm_assign_pci_pt_device(kvm_context, &pci_pt_dev); > + if (rc < 0) { > + fprintf(stderr, "Could not notify kernel about " > + "passthrough device\n"); > + perror("pt-ioctl"); > + return NULL; > + } > + } > +#endif > + > + fprintf(logfile, "Registered host PCI device %02x:%02x.%1x-%u " > + "as guest device %02x:%02x.%1x\n", > + r_bus, r_dev, r_func, machine_irq, > + pci_bus_num(e_bus), e_device, r_func); > + > + return (pci_dev); > +} > + > +#define MAX_PTDEVS 4 > +struct { > + char name[128]; > + int bus; > + int dev; > + int func; > + int irq; > + pt_dev_t *ptdev; > +} ptdevs[MAX_PTDEVS]; > + > +int nptdevs; > +extern int piix_get_irq(int); > + > +#ifdef KVM_CAP_PCI_PASSTHROUGH > +/* The pci config space got updated. Check if irq numbers have > changed + * for our devices > + */ > +void pci_pt_update_irq(PCIDevice *d) > +{ > + int i, irq, r; > + pt_dev_t *pt_dev; > + > + for (i = 0; i < nptdevs; i++) { > + pt_dev = ptdevs[i].ptdev; > + if (pt_dev == NULL) > + continue; > + > + irq = pci_map_irq(&pt_dev->dev, pt_dev->intpin); > + irq = piix_get_irq(irq); > + if (irq != pt_dev->girq) { > + struct kvm_pci_passthrough_dev pci_pt_dev; > + > + memset(&pci_pt_dev, 0, sizeof(pci_pt_dev)); > + pci_pt_dev.guest.irq = irq; > + pci_pt_dev.host.irq = pt_dev->mirq; > + r = kvm_assign_pci_pt_device(kvm_context, &pci_pt_dev); > + if (r < 0) { > + perror("pci_pt_update_irq"); > + continue; > + } > + pt_dev->girq = irq; > + } > + } > +} > +#endif > + > +static QEMUBH *ptbh; > +static int irqfd; > +static pt_dev_t **apicv[0xfe]; //0x10 - 0xfe according to intel > IOAPIC spec +#define IRQHOOK_DEV "/dev/irqhook" > +static pthread_t irqthread; > + > +static void *pt_irq(void *arg) > +{ > + char buf[20]; > + int irq; > + int i; > + pt_dev_t *dev; > + sigset_t signals; > + > + sigfillset(&signals); > + sigprocmask(SIG_BLOCK, &signals, NULL); > + > + if (!irqfd) { > + fprintf(stderr, "pt_irq: irqfd %d, exiting\n", irqfd); > + exit(-1); > + } > + > + for (;;) { > + if (read(irqfd, buf, 20) == -1) { > + if (errno == EINTR) > + continue; > + perror("irq read"); > + break; > + } > + > + irq = atoi(buf); > + DEBUG("read irq %d\n", irq); > + if (!irq) continue; > + > + for (i = 0; i < nptdevs; i++) > + if ((dev = ptdevs[i].ptdev) && dev->mirq == irq) > + dev->run = 1; > + qemu_bh_schedule(ptbh); > + } > + return NULL; > +} > + > +static void pt_bh(void *p) > +{ > + int i; > + pt_dev_t *dev; > + for (i = 0; i < nptdevs; i++) > + if ((dev = ptdevs[i].ptdev) && dev->run) { > + qemu_set_irq(dev->dev.irq[dev->intpin], 1); > + dev->run = 0; > + if (cpu_single_env) > + cpu_interrupt(cpu_single_env, CPU_INTERRUPT_EXIT); > + } > +} > + > +int pt_init(PCIBus * bus) > +{ > + pt_dev_t *dev = NULL; > + int i, ret = 0; > + > + iopl(3); > + > + if (kvm_enabled() && !qemu_kvm_irqchip_in_kernel()) { > + if (!(ptbh = qemu_bh_new(pt_bh, 0))) { > + fprintf(logfile, "Couldn't register PT callback\n"); > + return -1; > + } > + > + if (!(irqfd = open(IRQHOOK_DEV, O_RDWR))) { > + fprintf(logfile, "Couldn't open PT irqhook dev, make " > + "sure the irqhook module is loaded\n"); > + return -1; > + } > + } > + > + for (i = 0; i < nptdevs; i++) { > + dev = register_real_device(bus, ptdevs[i].name, -1, > + ptdevs[i].bus, ptdevs[i].dev, > + ptdevs[i].func, ptdevs[i].irq); > + > + if (dev == NULL) { > + fprintf(logfile, "Error: Couldn't register device %s\n", > + ptdevs[i].name); > + ret = -1; > + } > + ptdevs[i].ptdev = dev; > + } > + > + if (kvm_enabled() && !qemu_kvm_irqchip_in_kernel()) > + if (pthread_create(&irqthread, 0, pt_irq, dev)) { > + fprintf(logfile, "Couldn't create IRQ thread\n"); > + return -1; > + } > + > + /* success */ > + return (ret); > +} > + > +void add_pci_passthrough_device(const char *arg) > +{ > + /* name/bus:dev.func-intr */ > + char *cp, *cp1; > + > + if (nptdevs >= MAX_PTDEVS) { > + fprintf(logfile, "Too many passthrough devices (max %d)\n", > + MAX_PTDEVS); > + return; > + } > + strcpy(ptdevs[nptdevs].name, arg); > + cp = strchr(ptdevs[nptdevs].name, '/'); > + if (cp == NULL) > + goto bad; > + *cp++ = 0; > + > + ptdevs[nptdevs].bus = strtoul(cp, &cp1, 16); > + if (*cp1 != ':') > + goto bad; > + cp = cp1 + 1; > + > + ptdevs[nptdevs].dev = strtoul(cp, &cp1, 16); > + if (*cp1 != '.') > + goto bad; > + cp = cp1 + 1; > + > + ptdevs[nptdevs].func = strtoul(cp, &cp1, 16); > + if (*cp1 != '-') > + goto bad; > + cp = cp1 + 1; > + > + ptdevs[nptdevs].irq = strtoul(cp, &cp1, 0); > + if (*cp1 != 0) > + goto bad; > + > + nptdevs++; > + return; > + bad: > + fprintf(logfile, "passthrough arg (%s) not in the form of " > + "name/bus:dev.func-intr\n", arg); > +} > + > +void pt_ack_mirq(int vector) > +{ > + pt_dev_t **p = apicv[vector]; > + if (!p) return; > + > + for (; *p; *p++) { > + write(irqfd, (*p)->sirq, strlen((*p)->sirq)); > + qemu_set_irq((*p)->dev.irq[(*p)->intpin], 0); > + } > +} > + > +static int pt_bind_mirq(int bus, int dev, int fn) > +{ > + char s[64]; > + sprintf(s, "+%d:%d.%d", bus, dev, fn); > + if (write(irqfd, s, strlen(s)) != strlen(s)) { > + perror("pt_bind_mirq"); > + fprintf(logfile, "Make sure the irqhook module is loaded\n"); > + exit(-1); > + } > + return 0; > +} > + > +int piix3_get_pin(int pic_irq); > + > +void pt_set_vector(int irq, int vector) > +{ > + int i, j; > + int pin = piix3_get_pin(irq); > + pt_dev_t *pt, **p; > + > + DEBUG("irq %d vector %d\n", irq, vector); > + if (vector > 0xfe) return; > + for (i = 0; i < nptdevs; i++) { > + pt = ptdevs[i].ptdev; > + if (!pt || pt->bound) > + continue; > + if (pci_map_irq(&pt->dev, pt->intpin) == pin) { > + for (j = 1, p = apicv[vector]; p; j++, *p++) > + ; > + apicv[vector] = realloc(apicv[vector], j * sizeof pt); > + p = &apicv[vector][j]; > + *(p-1) = pt; > + *p = 0; > + pt->bound = 1; > + } > + } > + DEBUG("done\n"); > +} > diff --git a/qemu/hw/pci-passthrough.h b/qemu/hw/pci-passthrough.h > new file mode 100644 > index 0000000..564b989 > --- /dev/null > +++ b/qemu/hw/pci-passthrough.h > @@ -0,0 +1,119 @@ > +/* > + * Copyright (c) 2007, Neocleus Corporation. > + * Copyright (c) 2007, Intel Corporation. > + * > + * This program is free software; you can redistribute it and/or > modify it + * under the terms and conditions of the GNU General > Public License, + * version 2, as published by the Free Software > Foundation. + * > + * This program is distributed in the hope it will be useful, but > WITHOUT + * ANY WARRANTY; without even the implied warranty of > MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU > General Public License for + * more details. > + * > + * You should have received a copy of the GNU General Public License > along with + * this program; if not, write to the Free Software > Foundation, Inc., 59 Temple + * Place - Suite 330, Boston, MA > 02111-1307 USA. + * > + * Data structures for storing PCI state > + * > + * Adapted to kvm by Qumranet > + * > + * Copyright (c) 2007, Neocleus, Alex Novik ([EMAIL PROTECTED]) > + * Copyright (c) 2007, Neocleus, Guy Zana ([EMAIL PROTECTED]) > + * Copyright (C) 2008, Qumranet, Amit Shah ([EMAIL PROTECTED]) > + */ > + > +#ifndef __PCI_PASSTHROUGH_H__ > +#define __PCI_PASSTHROUGH_H__ > + > +#include <sys/mman.h> > +#include "qemu-common.h" > +#include "pci.h" > +#include <linux/types.h> > + > +typedef __u8 u8; > +typedef __u16 u16; > +typedef __u32 u32; > +typedef __u64 u64; > + > +#define PT_DEBUG_PIO (0x01) > +#define PT_DEBUG_MMIO (0x02) > + > +/* From include/linux/pci.h in the kernel sources */ > +#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & > 0x07)) + > +typedef u32 pciaddr_t; > + > +#define MAX_IO_REGIONS (6) > + > +typedef struct pci_region_s { > + int type; /* Memory or port I/O */ > + int valid; > + pciaddr_t base_addr; > + pciaddr_t size; /* size of the region */ > + int resource_fd; > +} pci_region_t; > + > +typedef struct pci_dev_s { > + u8 bus, dev, func; /* Bus inside domain, device and function */ > + int irq; /* IRQ number */ > + u16 region_number; /* number of active regions */ > + > + /* Port I/O or MMIO Regions */ > + pci_region_t regions[MAX_IO_REGIONS]; > + int config_fd; > +} pci_dev_t; > + > +typedef struct pt_region_s { > + target_phys_addr_t e_physbase; > + uint32_t memory_index; > + void *r_virtbase; /* mmapped access address */ > + int num; /* our index within v_addrs[] */ > + uint32_t debug; > +} pt_region_t; > + > +typedef struct pt_dev_s { > + PCIDevice dev; > + int intpin; > + uint8_t debug_flags; > + pt_region_t v_addrs[PCI_NUM_REGIONS]; > + pci_dev_t real_device; > + int run; > + int mirq; > + int girq; > + char sirq[4]; > + int bound; > +} pt_dev_t; > + > +/* MMIO access functions */ > +uint32_t pt_mmio_readb(void *opaque, target_phys_addr_t e_phys); > +uint32_t pt_mmio_readw(void *opaque, target_phys_addr_t e_phys); > +uint32_t pt_mmio_readl(void *opaque, target_phys_addr_t e_phys); > +void pt_mmio_writeb(void *opaque, target_phys_addr_t e_phys, > uint32_t value); +void pt_mmio_writew(void *opaque, > target_phys_addr_t e_phys, uint32_t value); +void pt_mmio_writel(void > *opaque, target_phys_addr_t e_phys, uint32_t value); + > +/* PIO access functions */ > +uint32_t pt_ioport_readb(void *opaque, uint32_t addr); > +uint32_t pt_ioport_readw(void *opaque, uint32_t addr); > +uint32_t pt_ioport_readl(void *opaque, uint32_t addr); > +void pt_ioport_writeb(void *opaque, uint32_t addr, uint32_t value); > +void pt_ioport_writew(void *opaque, uint32_t addr, uint32_t value); > +void pt_ioport_writel(void *opaque, uint32_t addr, uint32_t value); > + > +/* Registration functions */ > +int register_pt_pio_region(uint32_t pio_start, uint32_t length, > + uint8_t do_logging); > +int register_pt_mmio_region(uint32_t mmio_addr, uint32_t length, > + uint8_t do_logging); > + > +/* Initialization functions */ > +int pt_init(PCIBus * bus); > +void add_pci_passthrough_device(const char *arg); > +void pt_set_vector(int irq, int vector); > +void pt_ack_mirq(int vector); > + > +#define logfile stderr > + > +#endif /* __PCI_PASSTHROUGH_H__ */ > diff --git a/qemu/hw/pci.c b/qemu/hw/pci.c > index a23a466..1a2b1fa 100644 > --- a/qemu/hw/pci.c > +++ b/qemu/hw/pci.c > @@ -26,6 +26,7 @@ > #include "console.h" > #include "net.h" > #include "pc.h" > +#include "qemu-kvm.h" > > //#define DEBUG_PCI > > @@ -49,6 +50,7 @@ struct PCIBus { > > static void pci_update_mappings(PCIDevice *d); > static void pci_set_irq(void *opaque, int irq_num, int level); > +void pci_pt_update_irq(PCIDevice *d); > > target_phys_addr_t pci_mem_base; > static int pci_irq_index; > @@ -449,6 +451,12 @@ void pci_default_write_config(PCIDevice *d, > val >>= 8; > } > > +#ifdef KVM_CAP_PCI_PASSTHROUGH > + if (kvm_enabled() && qemu_kvm_irqchip_in_kernel() && > + address >= 0x60 && address <= 0x63) > + pci_pt_update_irq(d); > +#endif > + > end = address + len; > if (end > PCI_COMMAND && address < (PCI_COMMAND + 2)) { > /* if the command register is modified, we must modify the > mappings */ @@ -551,6 +559,11 @@ static void pci_set_irq(void > *opaque, int irq_num, int level) bus->set_irq(bus->irq_opaque, > irq_num, bus->irq_count[irq_num] != 0); } > > +int pci_map_irq(PCIDevice *pci_dev, int pin) > +{ > + return pci_dev->bus->map_irq(pci_dev, pin); > +} > + > /***********************************************************/ > /* monitor info on PCI */ > > diff --git a/qemu/hw/pci.h b/qemu/hw/pci.h > index 60e4094..e11fbbf 100644 > --- a/qemu/hw/pci.h > +++ b/qemu/hw/pci.h > @@ -81,6 +81,7 @@ void pci_register_io_region(PCIDevice *pci_dev, int > region_num, uint32_t size, int type, > PCIMapIORegionFunc *map_func); > > +int pci_map_irq(PCIDevice *pci_dev, int pin); > uint32_t pci_default_read_config(PCIDevice *d, > uint32_t address, int len); > void pci_default_write_config(PCIDevice *d, > diff --git a/qemu/hw/piix_pci.c b/qemu/hw/piix_pci.c > index 90cb3a6..112381a 100644 > --- a/qemu/hw/piix_pci.c > +++ b/qemu/hw/piix_pci.c > @@ -237,6 +237,25 @@ static void piix3_set_irq(qemu_irq *pic, int > irq_num, int level) } > } > > +int piix3_get_pin(int pic_irq) > +{ > + int i; > + for (i = 0; i < 4; i++) > + if (piix3_dev->config[0x60+i] == pic_irq) > + return i; > + return -1; > +} > + > +int piix_get_irq(int pin) > +{ > + if (piix3_dev) > + return piix3_dev->config[0x60+pin]; > + if (piix4_dev) > + return piix4_dev->config[0x60+pin]; > + > + return 0; > +} > + > static void piix3_reset(PCIDevice *d) > { > uint8_t *pci_conf = d->config; > diff --git a/qemu/vl.c b/qemu/vl.c > index 7e4dce1..136119e 100644 > --- a/qemu/vl.c > +++ b/qemu/vl.c > @@ -37,6 +37,7 @@ > #include "qemu-char.h" > #include "block.h" > #include "audio/audio.h" > +#include "hw/pci-passthrough.h" > #include "migration.h" > #include "qemu-kvm.h" > > @@ -7790,6 +7791,10 @@ static void help(int exitcode) > #endif > "-no-kvm-irqchip disable KVM kernel mode PIC/IOAPIC/LAPIC\n" > "-no-kvm-pit disable KVM kernel mode PIT\n" > +#if defined(TARGET_I386) || defined(TARGET_X86_64) > + "-pcidevice name/bus:dev.func-intr \n" > + " expose a PCI device to the guest OS\n" > +#endif > #endif > #ifdef TARGET_I386 > "-std-vga simulate a standard VGA card with VESA > Bochs Extensions\n" @@ -7914,6 +7919,9 @@ enum { > QEMU_OPTION_no_kvm, > QEMU_OPTION_no_kvm_irqchip, > QEMU_OPTION_no_kvm_pit, > +#if defined(TARGET_I386) || defined(TARGET_X86_64) > + QEMU_OPTION_pcidevice, > +#endif > QEMU_OPTION_no_reboot, > QEMU_OPTION_no_shutdown, > QEMU_OPTION_show_cursor, > @@ -8002,6 +8010,9 @@ const QEMUOption qemu_options[] = { > #endif > { "no-kvm-irqchip", 0, QEMU_OPTION_no_kvm_irqchip }, > { "no-kvm-pit", 0, QEMU_OPTION_no_kvm_pit }, > +#if defined(TARGET_I386) || defined(TARGET_X86_64) > + { "pcidevice", HAS_ARG, QEMU_OPTION_pcidevice }, > +#endif > #endif > #if defined(TARGET_PPC) || defined(TARGET_SPARC) > { "g", 1, QEMU_OPTION_g }, > @@ -8904,6 +8915,11 @@ int main(int argc, char **argv) > kvm_pit = 0; > break; > } > +#if defined(TARGET_I386) || defined(TARGET_X86_64) > + case QEMU_OPTION_pcidevice: > + add_pci_passthrough_device(optarg); > + break; > +#endif > #endif > case QEMU_OPTION_usb: > usb_enabled = 1; > diff --git a/tools/pci_barsize.c b/tools/pci_barsize.c > new file mode 100644 > index 0000000..dd230c9 > --- /dev/null > +++ b/tools/pci_barsize.c > @@ -0,0 +1,53 @@ > +#include <stdio.h> > +#include <sys/types.h> > +#include <unistd.h> > +#include <sys/stat.h> > +#include <fcntl.h> > +#include <stdlib.h> > + > +int > +panic(char *msg) > +{ > + perror(msg); > + exit(1); > +} > + > +int > +main(int argc, char **argv) > +{ > + unsigned l, b, sz; > + int fd, ismem, bar = 0, offs; > + > + if (argc < 2) > + panic("usage: pci_barsize <file> [bar no]"); > + > + if ((fd = open(argv[1], O_RDWR)) < 0) > + panic("open"); > + > + if (argc > 2) > + bar = strtoul(argv[2], 0, 0); > + if (bar < 0 || bar > 5) > + panic("bar range 0-5"); > + > + offs = 0x10 + bar * 4; > + lseek(fd, offs, 0); > + read(fd, &l, sizeof(l)); > + printf("bar %d (offs 0x%x) == %x\n", bar, offs, l); > + > + ismem = !(l & 0x01); > + > + b = ~0; > + lseek(fd, offs, 0); > + write(fd, &b, sizeof(b)); > + > + lseek(fd, offs, 0); > + read(fd, &b, sizeof(b)); > + sz = ~(b & (ismem ? ~0x15 : ~0x1)) + 1; > + printf("bar %d %s size 0x%x == %ldKB (%x)\n", > + bar, ismem ? "memory" : "IO", sz, sz / 1024, b); > + > + lseek(fd, offs, 0); > + write(fd, &l, sizeof(l)); > + > + return 0; > +} > diff --git a/tools/pci_mmio.c b/tools/pci_mmio.c > new file mode 100644 > index 0000000..6e91571 > --- /dev/null > +++ b/tools/pci_mmio.c > @@ -0,0 +1,82 @@ > +#include <stdio.h> > +#include <sys/types.h> > +#include <unistd.h> > +#include <sys/stat.h> > +#include <fcntl.h> > +#include <stdlib.h> > +#include <sys/mman.h> > + > +int > +panic(char *msg) > +{ > + perror(msg); > + exit(1); > +} > + > +int > +main(int argc, char **argv) > +{ > + unsigned sz; > + int fd, cnt, rsz, offs = 0; > + void *map; > + struct stat st; > + > + if (argc < 2) > + panic("usage: pci_mmio <resouce-file> [offset [count]]"); > + > + if ((fd = open(argv[1], O_RDWR)) < 0) > + panic("open"); > + > + if (fstat(fd, &st) < 0) > + panic("fstat"); > + cnt = sz = st.st_size; > + > + if (argc > 2) > + offs = strtoul(argv[2], 0, 0); > + if (argc > 3) > + cnt = strtoul(argv[3], 0, 0); > + > + if (cnt < 0 || cnt > sz) > + panic("bad count"); > + if (offs < 0 || offs > sz) > + panic("bad offset"); > + if (offs + cnt > sz) { > + cnt = sz - offs; > + fprintf(stderr, "count truncated to %d", cnt); > + } > + if (cnt > 4 && offs % 4) > + panic("read bigger than 4 must be 4 bytes aligned"); > + if (cnt == 2 && offs % 2) > + panic("2 bytes read must be 2 bytes aligned"); > + if (cnt != 1 && cnt != 2 && cnt != 4 && cnt % 4) > + panic("counts must be 1, 2, 4 or 4*n"); > + > + fprintf(stderr, "reading %s [%d:%d]\n", argv[1], offs, offs + cnt); > + map = mmap(NULL, sz, PROT_WRITE | PROT_READ, MAP_SHARED, fd, 0); > + > + if (!map) > + panic("mmap"); > + > + rsz = cnt > 4 ? 4 : cnt; > + fprintf(stderr, "rsz: %d cnt %d\n", rsz, cnt); > + while (cnt > 0) { > + char buf[8]; > + switch (rsz) { > + case 1: > + *(char *)buf = *(char *)map + offs; > + break; > + case 2: > + *(short *)buf = *(short *)map + offs/sizeof(short); > + break; > + case 4: > + *(int *)buf = *(int *)map + offs/4; > + break; > + } > + write(1, buf, rsz); > + > + offs += rsz; > + cnt -= rsz; > + } > + fprintf(stderr, "done\n"); > + return 0; > +} > -- > 1.5.5.1 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html